mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
ac,radv: move mesh_fast_launch_2 to ac
To be shared with radeonsi. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35931>
This commit is contained in:
parent
09e6bc90ee
commit
d9df597042
12 changed files with 25 additions and 25 deletions
|
|
@ -1804,6 +1804,8 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
|
||||
set_custom_cu_en_mask(info);
|
||||
|
||||
info->mesh_fast_launch_2 = info->gfx_level >= GFX11;
|
||||
|
||||
const char *ib_filename = debug_get_option("AMD_PARSE_IB", NULL);
|
||||
if (ib_filename) {
|
||||
FILE *f = fopen(ib_filename, "r");
|
||||
|
|
@ -1967,6 +1969,7 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " has_set_sh_pairs_packed = %i\n", info->has_set_sh_pairs_packed);
|
||||
fprintf(f, " has_set_uconfig_pairs = %i\n", info->has_set_uconfig_pairs);
|
||||
fprintf(f, " conformant_trunc_coord = %i\n", info->conformant_trunc_coord);
|
||||
fprintf(f, " mesh_fast_launch_2 = %i\n", info->mesh_fast_launch_2);
|
||||
|
||||
if (info->gfx_level < GFX12) {
|
||||
fprintf(f, "Display features:\n");
|
||||
|
|
|
|||
|
|
@ -156,6 +156,8 @@ struct radeon_info {
|
|||
* AnisoPoint is treated as Point.
|
||||
*/
|
||||
bool conformant_trunc_coord;
|
||||
/* Support GS_FAST_LAUNCH(2) for mesh shaders. */
|
||||
bool mesh_fast_launch_2;
|
||||
|
||||
/* Display features. */
|
||||
/* There are 2 display DCC codepaths, because display expects unaligned DCC. */
|
||||
|
|
|
|||
|
|
@ -231,8 +231,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader,
|
|||
unsigned wave_size,
|
||||
unsigned workgroup_size,
|
||||
bool multiview,
|
||||
bool has_query,
|
||||
bool fast_launch_2);
|
||||
bool has_query);
|
||||
|
||||
bool
|
||||
ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
|
||||
|
|
|
|||
|
|
@ -1342,8 +1342,7 @@ ac_nir_lower_ngg_mesh(nir_shader *shader,
|
|||
unsigned wave_size,
|
||||
unsigned hw_workgroup_size,
|
||||
bool multiview,
|
||||
bool has_query,
|
||||
bool fast_launch_2)
|
||||
bool has_query)
|
||||
{
|
||||
unsigned vertices_per_prim =
|
||||
mesa_vertices_per_prim(shader->info.mesh.primitive_type);
|
||||
|
|
@ -1381,6 +1380,8 @@ ac_nir_lower_ngg_mesh(nir_shader *shader,
|
|||
shader->info.workgroup_size[1] *
|
||||
shader->info.workgroup_size[2];
|
||||
|
||||
bool fast_launch_2 = hw_info->mesh_fast_launch_2;
|
||||
|
||||
lower_ngg_ms_state state = {
|
||||
.layout = layout,
|
||||
.wave_size = wave_size,
|
||||
|
|
|
|||
|
|
@ -2575,7 +2575,7 @@ radv_emit_mesh_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_set_uconfig_reg_idx(&pdev->info, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST);
|
||||
radeon_end();
|
||||
|
||||
if (pdev->mesh_fast_launch_2)
|
||||
if (pdev->info.mesh_fast_launch_2)
|
||||
radv_gfx11_emit_meshlet(cmd_buffer, ms);
|
||||
|
||||
radv_emit_vgt_gs_out(cmd_buffer, gs_out);
|
||||
|
|
@ -2868,7 +2868,7 @@ radv_emit_vgt_shader_config_gfx6(struct radv_cmd_buffer *cmd_buffer, const struc
|
|||
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
|
||||
} else if (key->mesh) {
|
||||
assert(!key->ngg_passthrough);
|
||||
unsigned gs_fast_launch = pdev->mesh_fast_launch_2 ? 2 : 1;
|
||||
unsigned gs_fast_launch = pdev->info.mesh_fast_launch_2 ? 2 : 1;
|
||||
stages |=
|
||||
S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring);
|
||||
} else if (key->ngg) {
|
||||
|
|
@ -9727,7 +9727,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
|
|||
uint32_t draw_id_enable = !!cmd_buffer->state.uses_drawid;
|
||||
uint32_t draw_id_reg = !draw_id_enable ? 0 : (base_reg + (xyz_dim_enable ? 12 : 0) - SI_SH_REG_OFFSET) >> 2;
|
||||
|
||||
uint32_t mode1_enable = !pdev->mesh_fast_launch_2;
|
||||
uint32_t mode1_enable = !pdev->info.mesh_fast_launch_2;
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1));
|
||||
|
|
@ -9814,7 +9814,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, cons
|
|||
|
||||
uint32_t xyz_dim_en = mesh_shader->info.cs.uses_grid_size;
|
||||
uint32_t xyz_dim_reg = !xyz_dim_en ? 0 : (cmd_state->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2;
|
||||
uint32_t mode1_en = !pdev->mesh_fast_launch_2;
|
||||
uint32_t mode1_en = !pdev->info.mesh_fast_launch_2;
|
||||
uint32_t linear_dispatch_en = cmd_state->shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch;
|
||||
const bool sqtt_en = !!device->sqtt.bo;
|
||||
|
||||
|
|
@ -10143,7 +10143,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x
|
|||
|
||||
radv_emit_userdata_mesh(cmd_buffer, x, y, z);
|
||||
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
if (pdev->info.mesh_fast_launch_2) {
|
||||
if (!view_mask) {
|
||||
radv_cs_emit_mesh_dispatch_packet(cmd_buffer, x, y, z);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -277,7 +277,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
|
|||
*ace_cmd_size += 6 * 4;
|
||||
} else {
|
||||
/* userdata writes + instance count + non-indexed draw */
|
||||
*cmd_size += (6 + 2 + (pdev->mesh_fast_launch_2 ? 5 : 3)) * 4;
|
||||
*cmd_size += (6 + 2 + (pdev->info.mesh_fast_launch_2 ? 5 : 3)) * 4;
|
||||
}
|
||||
} else {
|
||||
/* userdata writes + instance count + non-indexed draw */
|
||||
|
|
@ -2105,7 +2105,7 @@ dgc_emit_dispatch_taskmesh_gfx(struct dgc_cmdbuf *cs, nir_def *sequence_id)
|
|||
nir_def *ring_entry_reg = load_param16(b, mesh_ring_entry_sgpr);
|
||||
|
||||
nir_def *xyz_dim_enable = nir_bcsel(b, has_grid_size, nir_imm_int(b, S_4D1_XYZ_DIM_ENABLE(1)), nir_imm_int(b, 0));
|
||||
nir_def *mode1_enable = nir_imm_int(b, S_4D1_MODE1_ENABLE(!pdev->mesh_fast_launch_2));
|
||||
nir_def *mode1_enable = nir_imm_int(b, S_4D1_MODE1_ENABLE(!pdev->info.mesh_fast_launch_2));
|
||||
nir_def *linear_dispatch_en =
|
||||
nir_bcsel(b, has_linear_dispatch_en, nir_imm_int(b, S_4D1_LINEAR_DISPATCH_ENABLE(1)), nir_imm_int(b, 0));
|
||||
nir_def *sqtt_enable = nir_imm_int(b, device->sqtt.bo ? S_4D1_THREAD_TRACE_MARKER_ENABLE(1) : 0);
|
||||
|
|
@ -2154,7 +2154,7 @@ dgc_emit_draw_mesh_tasks_gfx(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_de
|
|||
dgc_emit_userdata_mesh(cs, x, y, z, sequence_id);
|
||||
dgc_emit_instance_count(cs, nir_imm_int(b, 1));
|
||||
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
if (pdev->info.mesh_fast_launch_2) {
|
||||
dgc_emit_dispatch_mesh_direct(cs, x, y, z);
|
||||
} else {
|
||||
nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z));
|
||||
|
|
@ -2220,7 +2220,7 @@ dgc_emit_draw_mesh_tasks_with_count_gfx(struct dgc_cmdbuf *cs, nir_def *stream_a
|
|||
nir_ior(b, nir_iand_imm(b, xyz_dim_reg, 0xFFFF), nir_ishl_imm(b, nir_iand_imm(b, draw_id_reg, 0xFFFF), 16)));
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
dgc_cs_emit(nir_ior_imm(b, nir_ior(b, draw_index_enable, xyz_dim_enable),
|
||||
S_4C2_MODE1_ENABLE(!pdev->mesh_fast_launch_2)));
|
||||
S_4C2_MODE1_ENABLE(!pdev->info.mesh_fast_launch_2)));
|
||||
} else {
|
||||
dgc_cs_emit(draw_index_enable);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2259,8 +2259,6 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
|
|||
|
||||
pdev->emulate_ngg_gs_query_pipeline_stat = pdev->use_ngg && pdev->info.gfx_level < GFX11;
|
||||
|
||||
pdev->mesh_fast_launch_2 = pdev->info.gfx_level >= GFX11;
|
||||
|
||||
pdev->emulate_mesh_shader_queries = pdev->info.gfx_level == GFX10_3;
|
||||
|
||||
/* Determine the number of threads per wave for all stages. */
|
||||
|
|
|
|||
|
|
@ -116,9 +116,6 @@ struct radv_physical_device {
|
|||
/* Whether to emulate the number of primitives generated by GS. */
|
||||
bool emulate_ngg_gs_query_pipeline_stat;
|
||||
|
||||
/* Whether to use GS_FAST_LAUNCH(2) for mesh shaders. */
|
||||
bool mesh_fast_launch_2;
|
||||
|
||||
/* Whether to emulate mesh/task shader queries. */
|
||||
bool emulate_mesh_shader_queries;
|
||||
|
||||
|
|
|
|||
|
|
@ -2653,7 +2653,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
active_nir_stages |= mesa_to_vk_shader_stage(i);
|
||||
}
|
||||
|
||||
if (!pdev->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
|
||||
if (!pdev->info.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
|
||||
BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
|
||||
nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
|
||||
nir_shader *task = stages[MESA_SHADER_TASK].nir;
|
||||
|
|
|
|||
|
|
@ -518,7 +518,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
/* Mesh shaders run as NGG which can implement local_invocation_index from
|
||||
* the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3.
|
||||
*/
|
||||
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->mesh_fast_launch_2,
|
||||
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->info.mesh_fast_launch_2,
|
||||
.lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE &&
|
||||
((((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
|
||||
(nir->info.workgroup_size[2] == 1)) == 2) ||
|
||||
|
|
@ -831,7 +831,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
|||
bool scratch_ring = false;
|
||||
NIR_PASS(_, nir, ac_nir_lower_ngg_mesh, &pdev->info, options.export_clipdist_mask, options.vs_output_param_offset,
|
||||
options.has_param_exports, &scratch_ring, info->wave_size, hw_workgroup_size,
|
||||
gfx_state->has_multiview_view_index, info->ms.has_query, pdev->mesh_fast_launch_2);
|
||||
gfx_state->has_multiview_view_index, info->ms.has_query);
|
||||
ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring;
|
||||
} else {
|
||||
unreachable("invalid SW stage passed to radv_lower_ngg");
|
||||
|
|
@ -1676,7 +1676,7 @@ radv_precompute_registers_hw_ms(struct radv_device *device, struct radv_shader_b
|
|||
|
||||
radv_precompute_registers_hw_ngg(device, &binary->config, &binary->info);
|
||||
|
||||
info->regs.vgt_gs_max_vert_out = pdev->mesh_fast_launch_2 ? info->ngg_info.max_out_verts : info->workgroup_size;
|
||||
info->regs.vgt_gs_max_vert_out = pdev->info.mesh_fast_launch_2 ? info->ngg_info.max_out_verts : info->workgroup_size;
|
||||
|
||||
info->regs.ms.spi_shader_gs_meshlet_dim = S_00B2B0_MESHLET_NUM_THREAD_X(info->cs.block_size[0] - 1) |
|
||||
S_00B2B0_MESHLET_NUM_THREAD_Y(info->cs.block_size[1] - 1) |
|
||||
|
|
|
|||
|
|
@ -248,7 +248,7 @@ declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args
|
|||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
if (pdev->info.mesh_fast_launch_2) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids_packed);
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
|
||||
|
|
@ -796,7 +796,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
|
|||
add_ud_arg(args, 1, AC_ARG_INT, &args->ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA);
|
||||
}
|
||||
|
||||
if (previous_stage != MESA_SHADER_MESH || !pdev->mesh_fast_launch_2) {
|
||||
if (previous_stage != MESA_SHADER_MESH || !pdev->info.mesh_fast_launch_2) {
|
||||
if (gfx_level >= GFX12) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
|
||||
|
|
|
|||
|
|
@ -909,7 +909,7 @@ calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
|
||||
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
if (pdev->info.mesh_fast_launch_2) {
|
||||
/* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */
|
||||
info->workgroup_size = api_workgroup_size;
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue