radv: Create mesh shading pipelines.

- Fill gfx10_ngg_info
- Allow NULL input assembly state
- Assert that the correct shader stages are used
- Program VGT_GS_MAX_VERT_OUT, GS_EN, GS_FAST_LAUNCH

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13580>
This commit is contained in:
Timur Kristóf 2021-12-22 21:25:23 +01:00 committed by Marge Bot
parent e2df56f502
commit 8dc4f626ac

View file

@ -1396,18 +1396,30 @@ radv_pipeline_is_blend_enabled(const VkGraphicsPipelineCreateInfo *pCreateInfo)
}
static uint64_t
radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
radv_pipeline_needed_dynamic_state(const struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
bool has_color_att = radv_pipeline_has_color_attachments(pCreateInfo);
bool has_static_rasterizer_discard =
pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
!radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT);
uint64_t states = RADV_DYNAMIC_ALL;
/* Disable dynamic states that are useless to mesh shading. */
if (radv_pipeline_has_mesh(pipeline)) {
if (has_static_rasterizer_discard)
return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
states &= ~(RADV_DYNAMIC_VERTEX_INPUT | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE);
}
/* If rasterization is disabled we do not care about any of the
* dynamic states, since they are all rasterization related only,
* except primitive topology, primitive restart enable, vertex
* binding stride and rasterization discard itself.
*/
if (pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
!radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT)) {
if (has_static_rasterizer_discard) {
return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
RADV_DYNAMIC_VERTEX_INPUT;
@ -1572,7 +1584,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct radv_graphics_pipeline_create_info *extra)
{
uint64_t needed_states = radv_pipeline_needed_dynamic_state(pCreateInfo);
uint64_t needed_states = radv_pipeline_needed_dynamic_state(pipeline, pCreateInfo);
uint64_t states = needed_states;
pipeline->dynamic_state = default_dynamic_state;
@ -1640,9 +1652,14 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
}
if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
dynamic->primitive_topology = si_translate_prim(pCreateInfo->pInputAssemblyState->topology);
if (extra && extra->use_rectlist) {
dynamic->primitive_topology = V_008958_DI_PT_RECTLIST;
if (radv_pipeline_has_mesh(pipeline)) {
dynamic->primitive_topology = V_008958_DI_PT_POINTLIST;
} else {
dynamic->primitive_topology = si_translate_prim(pCreateInfo->pInputAssemblyState->topology);
if (extra && extra->use_rectlist) {
dynamic->primitive_topology = V_008958_DI_PT_RECTLIST;
}
}
}
@ -2053,6 +2070,57 @@ gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum chip_class chip_class, uin
S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
}
static void
gfx10_get_ngg_ms_info(nir_shader ** nir, struct radv_shader_info *infos, struct gfx10_ngg_info *ngg)
{
/* Special case for mesh shader workgroups.
*
* Mesh shaders don't have any real vertex input, but they can produce
* an arbitrary number of vertices and primitives (up to 256).
* We need to precisely control the number of mesh shader workgroups
* that are launched from draw calls.
*
* To achieve that, we set:
* - input primitive topology to point list
* - input vertex and primitive count to 1
* - max output vertex count and primitive amplification factor
* to the boundaries of the shader
*
* With that, in the draw call:
* - drawing 1 input vertex ~ launching 1 mesh shader workgroup
*
* In the shader:
* - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
* - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
*
* Notes:
* - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work
* - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs)
* - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index
*
*/
nir_shader *ms = nir[MESA_SHADER_MESH];
ngg->enable_vertex_grouping = true;
ngg->esgs_ring_size = 1;
ngg->hw_max_esverts = 1;
ngg->max_gsprims = 1;
ngg->max_out_verts = ms->info.mesh.max_vertices_out;
ngg->max_vert_out_per_gs_instance = false;
ngg->ngg_emit_size = 0;
ngg->prim_amp_factor = ms->info.mesh.max_primitives_out;
ngg->vgt_esgs_ring_itemsize = 1;
unsigned min_ngg_workgroup_size =
ac_compute_ngg_workgroup_size(ngg->hw_max_esverts, ngg->max_gsprims,
ngg->max_out_verts, ngg->prim_amp_factor);
unsigned api_workgroup_size =
ac_compute_cs_workgroup_size(ms->info.workgroup_size, false, UINT32_MAX);
infos[MESA_SHADER_MESH].workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
}
static void
gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline,
nir_shader **nir, struct radv_shader_info *infos, struct gfx10_ngg_info *ngg)
@ -2358,6 +2426,10 @@ get_vs_output_info(const struct radv_pipeline *pipeline)
static bool
radv_nir_stage_uses_xfb(const nir_shader *nir)
{
/* Mesh shaders don't support XFB. */
if (nir->info.stage == MESA_SHADER_MESH)
return false;
nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
bool uses_xfb = !!xfb;
@ -2422,6 +2494,9 @@ radv_link_shaders(struct radv_pipeline *pipeline,
if (shaders[MESA_SHADER_VERTEX]) {
ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX];
}
if (shaders[MESA_SHADER_MESH]) {
ordered_shaders[shader_count++] = shaders[MESA_SHADER_MESH];
}
if (shaders[MESA_SHADER_COMPUTE]) {
ordered_shaders[shader_count++] = shaders[MESA_SHADER_COMPUTE];
}
@ -2515,7 +2590,8 @@ radv_link_shaders(struct radv_pipeline *pipeline,
info->stage == pipeline->graphics.last_vgt_api_stage &&
((info->stage == MESA_SHADER_VERTEX && pipeline_key->vs.topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ||
(info->stage == MESA_SHADER_TESS_EVAL && info->tess.point_mode) ||
(info->stage == MESA_SHADER_GEOMETRY && info->gs.output_primitive == GL_POINTS));
(info->stage == MESA_SHADER_GEOMETRY && info->gs.output_primitive == GL_POINTS) ||
(info->stage == MESA_SHADER_MESH && info->mesh.primitive_type == GL_POINTS));
nir_variable *psiz_var =
nir_find_variable_with_location(ordered_shaders[i], nir_var_shader_out, VARYING_SLOT_PSIZ);
@ -2559,6 +2635,7 @@ radv_link_shaders(struct radv_pipeline *pipeline,
}
if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL ||
ordered_shaders[i]->info.stage == MESA_SHADER_MESH ||
(ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) ||
(ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
nir_lower_io_to_vector(ordered_shaders[i], nir_var_shader_out);
@ -2744,7 +2821,7 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline,
}
}
if (!key.vs.dynamic_input_state) {
if (!key.vs.dynamic_input_state && pCreateInfo->pVertexInputState) {
const VkPipelineVertexInputStateCreateInfo *input_state = pCreateInfo->pVertexInputState;
const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state = vk_find_struct_const(
input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
@ -2845,7 +2922,7 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline,
}
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
key.vs.topology = pCreateInfo->pInputAssemblyState->topology;
key.vs.topology = pCreateInfo->pInputAssemblyState ? pCreateInfo->pInputAssemblyState->topology : 0;
const VkPipelineRasterizationStateCreateInfo *raster_info = pCreateInfo->pRasterizationState;
const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_info =
@ -2903,7 +2980,9 @@ radv_determine_ngg_settings(struct radv_pipeline *pipeline,
{
struct radv_device *device = pipeline->device;
if (!nir[MESA_SHADER_GEOMETRY] && pipeline->graphics.last_vgt_api_stage != MESA_SHADER_NONE) {
/* Shader settings for VS or TES without GS. */
if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_VERTEX ||
pipeline->graphics.last_vgt_api_stage == MESA_SHADER_TESS_EVAL) {
uint64_t ps_inputs_read =
nir[MESA_SHADER_FRAGMENT] ? nir[MESA_SHADER_FRAGMENT]->info.inputs_read : 0;
gl_shader_stage es_stage = pipeline->graphics.last_vgt_api_stage;
@ -2972,8 +3051,10 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
if (pipeline_key->use_ngg) {
if (nir[MESA_SHADER_TESS_CTRL]) {
infos[MESA_SHADER_TESS_EVAL].is_ngg = true;
} else {
} else if (nir[MESA_SHADER_VERTEX]) {
infos[MESA_SHADER_VERTEX].is_ngg = true;
} else if (nir[MESA_SHADER_MESH]) {
infos[MESA_SHADER_MESH].is_ngg = true;
}
if (nir[MESA_SHADER_TESS_CTRL] && nir[MESA_SHADER_GEOMETRY] &&
@ -3571,11 +3652,24 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
modules[i]->sha1);
pipeline->active_stages |= mesa_to_vk_shader_stage(i);
if (i < MESA_SHADER_FRAGMENT)
if (i < MESA_SHADER_FRAGMENT || i == MESA_SHADER_MESH)
pipeline->graphics.last_vgt_api_stage = i;
}
}
ASSERTED bool primitive_shading =
modules[MESA_SHADER_VERTEX] || modules[MESA_SHADER_TESS_CTRL] ||
modules[MESA_SHADER_TESS_EVAL] || modules[MESA_SHADER_GEOMETRY];
ASSERTED bool mesh_shading =
modules[MESA_SHADER_MESH];
/* Primitive and mesh shading must not be mixed in the same pipeline. */
assert(!primitive_shading || !mesh_shading);
/* Mesh shaders are mandatory in mesh shading pipelines. */
assert(mesh_shading == !!modules[MESA_SHADER_MESH]);
/* Mesh shaders always need NGG. */
assert(!mesh_shading || pipeline_key->use_ngg);
if (custom_hash)
memcpy(hash, custom_hash, 20);
else {
@ -3647,7 +3741,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
radv_fill_shader_info(pipeline, pipeline_layout, pStages, pipeline_key, infos, nir);
bool pipeline_has_ngg = (nir[MESA_SHADER_VERTEX] && infos[MESA_SHADER_VERTEX].is_ngg) ||
(nir[MESA_SHADER_TESS_EVAL] && infos[MESA_SHADER_TESS_EVAL].is_ngg);
(nir[MESA_SHADER_TESS_EVAL] && infos[MESA_SHADER_TESS_EVAL].is_ngg) ||
(nir[MESA_SHADER_MESH] && infos[MESA_SHADER_MESH].is_ngg);
if (pipeline_has_ngg) {
struct gfx10_ngg_info *ngg_info;
@ -3656,10 +3751,17 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
ngg_info = &infos[MESA_SHADER_GEOMETRY].ngg_info;
else if (nir[MESA_SHADER_TESS_CTRL])
ngg_info = &infos[MESA_SHADER_TESS_EVAL].ngg_info;
else
else if (nir[MESA_SHADER_VERTEX])
ngg_info = &infos[MESA_SHADER_VERTEX].ngg_info;
else if (nir[MESA_SHADER_MESH])
ngg_info = &infos[MESA_SHADER_MESH].ngg_info;
else
unreachable("Missing NGG shader stage.");
gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info);
if (pipeline->graphics.last_vgt_api_stage == MESA_SHADER_MESH)
gfx10_get_ngg_ms_info(nir, infos, ngg_info);
else
gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info);
} else if (nir[MESA_SHADER_GEOMETRY]) {
struct gfx9_gs_info *gs_info = &infos[MESA_SHADER_GEOMETRY].gs_ring_info;
@ -3949,6 +4051,9 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_sta
} else {
return R_00B130_SPI_SHADER_USER_DATA_VS_0;
}
case MESA_SHADER_MESH:
assert(has_ngg);
return R_00B230_SPI_SHADER_USER_DATA_GS_0;
default:
unreachable("unknown shader");
}
@ -4713,10 +4818,9 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
{
uint64_t va = radv_shader_get_va(shader);
gl_shader_stage es_type =
radv_pipeline_has_mesh(pipeline) ? MESA_SHADER_MESH :
radv_pipeline_has_tess(pipeline) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
struct radv_shader *es = es_type == MESA_SHADER_TESS_EVAL
? pipeline->shaders[MESA_SHADER_TESS_EVAL]
: pipeline->shaders[MESA_SHADER_VERTEX];
struct radv_shader *es = pipeline->shaders[es_type];
const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
@ -5113,6 +5217,18 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, struct rade
radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
}
static void
radv_pipeline_generate_mesh_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_pipeline *pipeline)
{
struct radv_shader *ms = pipeline->shaders[MESA_SHADER_MESH];
if (!ms)
return;
radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, ms);
radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, ms->info.workgroup_size);
}
static uint32_t
offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool float16)
{
@ -5352,6 +5468,9 @@ radv_pipeline_generate_vgt_shader_config(struct radeon_cmdbuf *ctx_cs,
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
} else if (radv_pipeline_has_gs(pipeline)) {
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
} else if (radv_pipeline_has_mesh(pipeline)) {
assert(!radv_pipeline_has_ngg_passthrough(pipeline));
stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1);
} else if (radv_pipeline_has_ngg(pipeline)) {
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
}
@ -5383,6 +5502,8 @@ radv_pipeline_generate_vgt_shader_config(struct radeon_cmdbuf *ctx_cs,
vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
else if (pipeline->shaders[MESA_SHADER_VERTEX])
vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.wave_size;
else if (pipeline->shaders[MESA_SHADER_MESH])
vs_size = gs_size = pipeline->shaders[MESA_SHADER_MESH]->info.wave_size;
if (radv_pipeline_has_ngg(pipeline)) {
assert(!radv_pipeline_has_gs_copy_shader(pipeline));
@ -5483,6 +5604,9 @@ radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs,
gs_out = si_conv_gl_prim_to_gs_out(
pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.primitive_mode);
}
} else if (radv_pipeline_has_mesh(pipeline)) {
gs_out =
si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_MESH]->info.ms.output_prim);
} else {
gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology);
}
@ -5596,6 +5720,7 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
radv_pipeline_generate_multisample_state(ctx_cs, pipeline);
radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline);
radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline);
radv_pipeline_generate_mesh_shader(ctx_cs, cs, pipeline);
if (radv_pipeline_has_tess(pipeline)) {
radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline);
@ -5715,16 +5840,21 @@ radv_pipeline_init_shader_stages_state(struct radv_pipeline *pipeline)
}
}
gl_shader_stage first_stage =
radv_pipeline_has_mesh(pipeline) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX;
struct radv_userdata_info *loc =
radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
radv_lookup_user_sgpr(pipeline, first_stage, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
if (loc->sgpr_idx != -1) {
pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[first_stage];
pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
pipeline->graphics.vtx_emit_num = loc->num_sgprs;
pipeline->graphics.uses_drawid =
radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id;
radv_get_shader(pipeline, first_stage)->info.vs.needs_draw_id;
pipeline->graphics.uses_baseinstance =
radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_base_instance;
radv_get_shader(pipeline, first_stage)->info.vs.needs_base_instance;
assert(first_stage != MESA_SHADER_MESH || !pipeline->graphics.uses_baseinstance);
}
}
@ -5770,7 +5900,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra);
if (!radv_pipeline_has_mesh(pipeline))
radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra);
radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra);
radv_pipeline_init_raster_state(pipeline, pCreateInfo);
radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo);
@ -5825,7 +5956,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
pCreateInfo->pTessellationState->patchControlPoints;
}
radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo, &key);
if (!radv_pipeline_has_mesh(pipeline))
radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo, &key);
radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend);
radv_pipeline_init_shader_stages_state(pipeline);
radv_pipeline_init_scratch(device, pipeline);