mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
radv: Calculate workgroup sizes in radv_pipeline.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12321>
This commit is contained in:
parent
395c0c52c7
commit
9fd36bbacd
2 changed files with 56 additions and 8 deletions
|
|
@ -1842,11 +1842,12 @@ gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline
|
||||||
{
|
{
|
||||||
struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
|
struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
|
||||||
struct radv_es_output_info *es_info;
|
struct radv_es_output_info *es_info;
|
||||||
|
bool has_tess = !!nir[MESA_SHADER_TESS_CTRL];
|
||||||
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
|
if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
|
||||||
es_info = nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
|
es_info = has_tess ? &gs_info->tes.es_info : &gs_info->vs.es_info;
|
||||||
else
|
else
|
||||||
es_info = nir[MESA_SHADER_TESS_CTRL] ? &infos[MESA_SHADER_TESS_EVAL].tes.es_info
|
es_info = has_tess ? &infos[MESA_SHADER_TESS_EVAL].tes.es_info
|
||||||
: &infos[MESA_SHADER_VERTEX].vs.es_info;
|
: &infos[MESA_SHADER_VERTEX].vs.es_info;
|
||||||
|
|
||||||
unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
|
unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
|
||||||
bool uses_adjacency;
|
bool uses_adjacency;
|
||||||
|
|
@ -1949,6 +1950,14 @@ gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline
|
||||||
out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
|
out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
|
||||||
out->vgt_esgs_ring_itemsize = esgs_itemsize;
|
out->vgt_esgs_ring_itemsize = esgs_itemsize;
|
||||||
assert(max_prims_per_subgroup <= max_out_prims);
|
assert(max_prims_per_subgroup <= max_out_prims);
|
||||||
|
|
||||||
|
gl_shader_stage es_stage = has_tess ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||||
|
unsigned workgroup_size =
|
||||||
|
ac_compute_esgs_workgroup_size(
|
||||||
|
pipeline->device->physical_device->rad_info.chip_class, infos[es_stage].wave_size,
|
||||||
|
es_verts_per_subgroup, gs_inst_prims_in_subgroup);
|
||||||
|
infos[es_stage].workgroup_size = workgroup_size;
|
||||||
|
infos[MESA_SHADER_GEOMETRY].workgroup_size = workgroup_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -2212,6 +2221,13 @@ gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pi
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */
|
assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */
|
||||||
|
|
||||||
|
gl_shader_stage es_stage = nir[MESA_SHADER_TESS_CTRL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||||
|
unsigned workgroup_size =
|
||||||
|
ac_compute_ngg_workgroup_size(
|
||||||
|
max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor);
|
||||||
|
infos[MESA_SHADER_GEOMETRY].workgroup_size = workgroup_size;
|
||||||
|
infos[es_stage].workgroup_size = workgroup_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -2937,6 +2953,19 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
|
||||||
radv_get_ballot_bit_size(pipeline->device, pStages[i], i, &keys[i]);
|
radv_get_ballot_bit_size(pipeline->device, pStages[i], i, &keys[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* PS always operates without workgroups. */
|
||||||
|
if (nir[MESA_SHADER_FRAGMENT])
|
||||||
|
infos[MESA_SHADER_FRAGMENT].workgroup_size = infos[MESA_SHADER_FRAGMENT].wave_size;
|
||||||
|
|
||||||
|
if (nir[MESA_SHADER_COMPUTE]) {
|
||||||
|
/* Variable workgroup size is not supported by Vulkan. */
|
||||||
|
assert(!nir[MESA_SHADER_COMPUTE]->info.workgroup_size_variable);
|
||||||
|
|
||||||
|
infos[MESA_SHADER_COMPUTE].workgroup_size =
|
||||||
|
ac_compute_cs_workgroup_size(
|
||||||
|
nir[MESA_SHADER_COMPUTE]->info.workgroup_size, false, UINT32_MAX);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -2988,9 +3017,12 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade
|
||||||
{
|
{
|
||||||
merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
|
merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
|
||||||
|
|
||||||
|
unsigned tess_in_patch_size = pipeline_key->tess_input_vertices;
|
||||||
|
unsigned tess_out_patch_size = nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out;
|
||||||
|
|
||||||
/* Number of tessellation patches per workgroup processed by the current pipeline. */
|
/* Number of tessellation patches per workgroup processed by the current pipeline. */
|
||||||
unsigned num_patches = get_tcs_num_patches(
|
unsigned num_patches = get_tcs_num_patches(
|
||||||
pipeline_key->tess_input_vertices, nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
|
tess_in_patch_size, tess_out_patch_size,
|
||||||
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs,
|
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs,
|
||||||
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
|
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
|
||||||
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs, device->tess_offchip_block_dw_size,
|
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs, device->tess_offchip_block_dw_size,
|
||||||
|
|
@ -2998,8 +3030,7 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade
|
||||||
|
|
||||||
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
|
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
|
||||||
unsigned tcs_lds_size = calculate_tess_lds_size(
|
unsigned tcs_lds_size = calculate_tess_lds_size(
|
||||||
device->physical_device->rad_info.chip_class, pipeline_key->tess_input_vertices,
|
device->physical_device->rad_info.chip_class, tess_in_patch_size, tess_out_patch_size,
|
||||||
nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
|
|
||||||
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs, num_patches,
|
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs, num_patches,
|
||||||
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
|
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
|
||||||
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs);
|
infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs);
|
||||||
|
|
@ -3015,6 +3046,9 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade
|
||||||
|
|
||||||
infos[MESA_SHADER_TESS_EVAL].num_tess_patches = num_patches;
|
infos[MESA_SHADER_TESS_EVAL].num_tess_patches = num_patches;
|
||||||
infos[MESA_SHADER_GEOMETRY].num_tess_patches = num_patches;
|
infos[MESA_SHADER_GEOMETRY].num_tess_patches = num_patches;
|
||||||
|
infos[MESA_SHADER_VERTEX].num_tess_patches = num_patches;
|
||||||
|
infos[MESA_SHADER_TESS_CTRL].tcs.tcs_vertices_out = tess_out_patch_size;
|
||||||
|
infos[MESA_SHADER_VERTEX].tcs.tcs_vertices_out = tess_out_patch_size;
|
||||||
|
|
||||||
if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
|
if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
|
||||||
/* When the number of TCS input and output vertices are the same (typically 3):
|
/* When the number of TCS input and output vertices are the same (typically 3):
|
||||||
|
|
@ -3028,8 +3062,7 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade
|
||||||
*/
|
*/
|
||||||
infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq =
|
infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq =
|
||||||
device->physical_device->rad_info.chip_class >= GFX9 &&
|
device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||||
pipeline_key->tess_input_vertices ==
|
tess_in_patch_size == tess_out_patch_size &&
|
||||||
nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out &&
|
|
||||||
nir[MESA_SHADER_VERTEX]->info.float_controls_execution_mode ==
|
nir[MESA_SHADER_VERTEX]->info.float_controls_execution_mode ==
|
||||||
nir[MESA_SHADER_TESS_CTRL]->info.float_controls_execution_mode;
|
nir[MESA_SHADER_TESS_CTRL]->info.float_controls_execution_mode;
|
||||||
|
|
||||||
|
|
@ -3046,6 +3079,12 @@ gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shade
|
||||||
infos[MESA_SHADER_TESS_CTRL].vs.tcs_temp_only_input_mask =
|
infos[MESA_SHADER_TESS_CTRL].vs.tcs_temp_only_input_mask =
|
||||||
infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask;
|
infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; ++s)
|
||||||
|
infos[s].workgroup_size =
|
||||||
|
ac_compute_lshs_workgroup_size(
|
||||||
|
device->physical_device->rad_info.chip_class, s,
|
||||||
|
num_patches, tess_in_patch_size, tess_out_patch_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -3397,12 +3436,19 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
|
||||||
struct gfx9_gs_info *gs_info = &infos[MESA_SHADER_GEOMETRY].gs_ring_info;
|
struct gfx9_gs_info *gs_info = &infos[MESA_SHADER_GEOMETRY].gs_ring_info;
|
||||||
|
|
||||||
gfx9_get_gs_info(pipeline_key, pipeline, nir, infos, gs_info);
|
gfx9_get_gs_info(pipeline_key, pipeline, nir, infos, gs_info);
|
||||||
|
} else {
|
||||||
|
gl_shader_stage hw_vs_api_stage =
|
||||||
|
nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||||
|
infos[hw_vs_api_stage].workgroup_size = infos[hw_vs_api_stage].wave_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||||
if (nir[i]) {
|
if (nir[i]) {
|
||||||
radv_start_feedback(stage_feedbacks[i]);
|
radv_start_feedback(stage_feedbacks[i]);
|
||||||
|
|
||||||
|
/* Wave and workgroup size should already be filled. */
|
||||||
|
assert(infos[i].wave_size && infos[i].workgroup_size);
|
||||||
|
|
||||||
if (!radv_use_llvm_for_stage(device, i)) {
|
if (!radv_use_llvm_for_stage(device, i)) {
|
||||||
nir_lower_non_uniform_access_options options = {
|
nir_lower_non_uniform_access_options options = {
|
||||||
.types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access |
|
.types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access |
|
||||||
|
|
@ -3517,6 +3563,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
|
||||||
radv_nir_shader_info_pass(device, nir[MESA_SHADER_GEOMETRY], pipeline->layout, &key,
|
radv_nir_shader_info_pass(device, nir[MESA_SHADER_GEOMETRY], pipeline->layout, &key,
|
||||||
&info);
|
&info);
|
||||||
info.wave_size = 64; /* Wave32 not supported. */
|
info.wave_size = 64; /* Wave32 not supported. */
|
||||||
|
info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
|
||||||
info.ballot_bit_size = 64;
|
info.ballot_bit_size = 64;
|
||||||
|
|
||||||
pipeline->gs_copy_shader = radv_create_gs_copy_shader(
|
pipeline->gs_copy_shader = radv_create_gs_copy_shader(
|
||||||
|
|
|
||||||
|
|
@ -267,6 +267,7 @@ struct radv_shader_info {
|
||||||
bool has_ngg_early_prim_export;
|
bool has_ngg_early_prim_export;
|
||||||
uint32_t num_lds_blocks_when_not_culling;
|
uint32_t num_lds_blocks_when_not_culling;
|
||||||
uint32_t num_tess_patches;
|
uint32_t num_tess_patches;
|
||||||
|
unsigned workgroup_size;
|
||||||
struct {
|
struct {
|
||||||
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
|
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
|
||||||
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
|
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue