From 9fd36bbacd70a50eeadc1910ab125cfa6531b70d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 11 Aug 2021 08:54:28 +0200 Subject: [PATCH] radv: Calculate workgroup sizes in radv_pipeline. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Part-of: --- src/amd/vulkan/radv_pipeline.c | 63 +++++++++++++++++++++++++++++----- src/amd/vulkan/radv_shader.h | 1 + 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index ce211086d9d..ae79894ddcd 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1842,11 +1842,12 @@ gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline { struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY]; struct radv_es_output_info *es_info; + bool has_tess = !!nir[MESA_SHADER_TESS_CTRL]; if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) - es_info = nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info; + es_info = has_tess ? &gs_info->tes.es_info : &gs_info->vs.es_info; else - es_info = nir[MESA_SHADER_TESS_CTRL] ? &infos[MESA_SHADER_TESS_EVAL].tes.es_info - : &infos[MESA_SHADER_VERTEX].vs.es_info; + es_info = has_tess ? &infos[MESA_SHADER_TESS_EVAL].tes.es_info + : &infos[MESA_SHADER_VERTEX].vs.es_info; unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1); bool uses_adjacency; @@ -1949,6 +1950,14 @@ gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup); out->vgt_esgs_ring_itemsize = esgs_itemsize; assert(max_prims_per_subgroup <= max_out_prims); + + gl_shader_stage es_stage = has_tess ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; + unsigned workgroup_size = + ac_compute_esgs_workgroup_size( + pipeline->device->physical_device->rad_info.chip_class, infos[es_stage].wave_size, + es_verts_per_subgroup, gs_inst_prims_in_subgroup); + infos[es_stage].workgroup_size = workgroup_size; + infos[MESA_SHADER_GEOMETRY].workgroup_size = workgroup_size; } static void @@ -2212,6 +2221,13 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pi } assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */ + + gl_shader_stage es_stage = nir[MESA_SHADER_TESS_CTRL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; + unsigned workgroup_size = + ac_compute_ngg_workgroup_size( + max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor); + infos[MESA_SHADER_GEOMETRY].workgroup_size = workgroup_size; + infos[es_stage].workgroup_size = workgroup_size; } static void @@ -2937,6 +2953,19 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, radv_get_ballot_bit_size(pipeline->device, pStages[i], i, &keys[i]); } } + + /* PS always operates without workgroups. */ + if (nir[MESA_SHADER_FRAGMENT]) + infos[MESA_SHADER_FRAGMENT].workgroup_size = infos[MESA_SHADER_FRAGMENT].wave_size; + + if (nir[MESA_SHADER_COMPUTE]) { + /* Variable workgroup size is not supported by Vulkan. */ + assert(!nir[MESA_SHADER_COMPUTE]->info.workgroup_size_variable); + + infos[MESA_SHADER_COMPUTE].workgroup_size = + ac_compute_cs_workgroup_size( + nir[MESA_SHADER_COMPUTE]->info.workgroup_size, false, UINT32_MAX); + } } static void @@ -2988,9 +3017,12 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade { merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info); + unsigned tess_in_patch_size = pipeline_key->tess_input_vertices; + unsigned tess_out_patch_size = nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out; + /* Number of tessellation patches per workgroup processed by the current pipeline. */ unsigned num_patches = get_tcs_num_patches( - pipeline_key->tess_input_vertices, nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, + tess_in_patch_size, tess_out_patch_size, infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs, infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs, infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs, device->tess_offchip_block_dw_size, @@ -2998,8 +3030,7 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade /* LDS size used by VS+TCS for storing TCS inputs and outputs. */ unsigned tcs_lds_size = calculate_tess_lds_size( - device->physical_device->rad_info.chip_class, pipeline_key->tess_input_vertices, - nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, + device->physical_device->rad_info.chip_class, tess_in_patch_size, tess_out_patch_size, infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs, num_patches, infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs, infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs); @@ -3015,6 +3046,9 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade infos[MESA_SHADER_TESS_EVAL].num_tess_patches = num_patches; infos[MESA_SHADER_GEOMETRY].num_tess_patches = num_patches; + infos[MESA_SHADER_VERTEX].num_tess_patches = num_patches; + infos[MESA_SHADER_TESS_CTRL].tcs.tcs_vertices_out = tess_out_patch_size; + infos[MESA_SHADER_VERTEX].tcs.tcs_vertices_out = tess_out_patch_size; if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) { /* When the number of TCS input and output vertices are the same (typically 3): @@ -3028,8 +3062,7 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade */ infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq = device->physical_device->rad_info.chip_class >= GFX9 && - pipeline_key->tess_input_vertices == - nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out && + tess_in_patch_size == tess_out_patch_size && nir[MESA_SHADER_VERTEX]->info.float_controls_execution_mode == nir[MESA_SHADER_TESS_CTRL]->info.float_controls_execution_mode; @@ -3046,6 +3079,12 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade infos[MESA_SHADER_TESS_CTRL].vs.tcs_temp_only_input_mask = infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask; } + + for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; ++s) + infos[s].workgroup_size = + ac_compute_lshs_workgroup_size( + device->physical_device->rad_info.chip_class, s, + num_patches, tess_in_patch_size, tess_out_patch_size); } static void @@ -3397,12 +3436,19 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, struct gfx9_gs_info *gs_info = &infos[MESA_SHADER_GEOMETRY].gs_ring_info; gfx9_get_gs_info(pipeline_key, pipeline, nir, infos, gs_info); + } else { + gl_shader_stage hw_vs_api_stage = + nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; + infos[hw_vs_api_stage].workgroup_size = infos[hw_vs_api_stage].wave_size; } for (int i = 0; i < MESA_SHADER_STAGES; ++i) { if (nir[i]) { radv_start_feedback(stage_feedbacks[i]); + /* Wave and workgroup size should already be filled. */ + assert(infos[i].wave_size && infos[i].workgroup_size); + if (!radv_use_llvm_for_stage(device, i)) { nir_lower_non_uniform_access_options options = { .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | @@ -3517,6 +3563,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, radv_nir_shader_info_pass(device, nir[MESA_SHADER_GEOMETRY], pipeline->layout, &key, &info); info.wave_size = 64; /* Wave32 not supported. */ + info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */ info.ballot_bit_size = 64; pipeline->gs_copy_shader = radv_create_gs_copy_shader( diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index cabf6845a87..10747f57fa5 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -267,6 +267,7 @@ struct radv_shader_info { bool has_ngg_early_prim_export; uint32_t num_lds_blocks_when_not_culling; uint32_t num_tess_patches; + unsigned workgroup_size; struct { uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX]; uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];