radv: emit indirect sets for indirect compute pipelines with DGC

This used to work by luck because the current DGC prepare shader
is using one descriptor set and it was the currently bound compute
shader... Using two descriptor sets or starting from 1 would just fail.

For indirect compute pipelines, descriptors must be emitted from the
DGC shader because there is no bound compute pipeline at all. This
solution is using indirect descriptor sets because it's much shorter
and easier to implement. This could be improved but nothing uses
indirect compute pipelines and this is like experimental stuff.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29700>
This commit is contained in:
Samuel Pitoiset 2024-06-12 17:41:15 +02:00 committed by Marge Bot
parent b1ba02e707
commit 33a849e004
5 changed files with 59 additions and 6 deletions

View file

@ -943,7 +943,7 @@ radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs,
radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, va, false);
}
static uint64_t
uint64_t
radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx)
{
struct radv_descriptor_set *set = descriptors_state->sets[set_idx];
@ -11599,6 +11599,9 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
if (compute) {
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
if (!pGeneratedCommandsInfo->pipeline)
radv_mark_descriptor_sets_dirty(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
radv_dgc_after_dispatch(cmd_buffer);
} else {
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);

View file

@ -779,4 +779,6 @@ void radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64
void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer);
uint64_t radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx);
#endif /* RADV_CMD_BUFFER_H */

View file

@ -17,7 +17,7 @@
static void
radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout,
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size)
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size, uint32_t *upload_size)
{
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -59,6 +59,12 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
/* PKT3_SET_SH_REG for pointer */
*cmd_size += 4 * 4;
}
/* PKT3_SET_SH_REG for indirect descriptor sets pointer */
*cmd_size += 3 * 4;
/* Reserve space for indirect pipelines because they might use indirect descriptor sets. */
*upload_size += MAX_SETS * 4;
}
if (device->sqtt.bo) {
@ -169,7 +175,7 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
} else {
assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
struct radv_compute_pipeline *compute_pipeline = pipeline ? radv_pipeline_to_compute(pipeline) : NULL;
radv_get_sequence_size_compute(layout, compute_pipeline, cmd_size);
radv_get_sequence_size_compute(layout, compute_pipeline, cmd_size, upload_size);
}
}
@ -264,6 +270,9 @@ struct radv_dgc_params {
uint8_t bind_pipeline;
uint16_t pipeline_params_offset;
/* For indirect descriptor sets */
uint32_t indirect_desc_sets_va;
};
enum {
@ -1080,6 +1089,9 @@ dgc_emit_push_constant(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *pus
nir_def *param_buf = radv_meta_load_descriptor(b, 0, 0);
nir_def *param_offset = nir_imul_imm(b, vbo_cnt, 24);
param_offset = nir_iadd(
b, param_offset,
nir_bcsel(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1), nir_imm_int(b, MAX_SETS * 4), nir_imm_int(b, 0)));
nir_def *param_offset_offset = nir_iadd_imm(b, param_offset, MESA_VULKAN_SHADER_STAGES * 12);
nir_def *param_const_offset =
nir_iadd_imm(b, param_offset, MAX_PUSH_CONSTANTS_SIZE + MESA_VULKAN_SHADER_STAGES * 12);
@ -1499,7 +1511,7 @@ dgc_emit_draw_mesh_tasks(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *d
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NV.
*/
static void
dgc_emit_bind_pipeline(struct dgc_cmdbuf *cs, nir_def *stream_addr)
dgc_emit_bind_pipeline(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_variable *upload_offset)
{
const struct radv_device *device = cs->dev;
const struct radv_physical_device *pdev = radv_device_physical(device);
@ -1527,7 +1539,19 @@ dgc_emit_bind_pipeline(struct dgc_cmdbuf *cs, nir_def *stream_addr)
dgc_cs_emit(load_metadata32(b, block_size_x));
dgc_cs_emit(load_metadata32(b, block_size_y));
dgc_cs_emit(load_metadata32(b, block_size_z));
nir_def *indirect_desc_sets_sgpr = load_metadata32(b, indirect_desc_sets_sgpr);
nir_push_if(b, nir_ine_imm(b, indirect_desc_sets_sgpr, 0));
{
dgc_cs_emit_imm(PKT3(PKT3_SET_SH_REG, 1, 0));
dgc_cs_emit(indirect_desc_sets_sgpr);
dgc_cs_emit(load_param32(b, indirect_desc_sets_va));
}
nir_pop_if(b, NULL);
dgc_cs_end();
nir_store_var(b, upload_offset, nir_iadd_imm(b, nir_load_var(b, upload_offset), MAX_SETS * 4), 0x1);
}
static nir_def *
@ -1637,7 +1661,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
nir_push_if(&b, nir_ieq_imm(&b, load_param8(&b, bind_pipeline), 1));
{
dgc_emit_bind_pipeline(&cmd_buf, stream_addr);
dgc_emit_bind_pipeline(&cmd_buf, stream_addr, upload_offset);
}
nir_pop_if(&b, 0);
@ -2089,8 +2113,9 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t desc_size = pipeline ? 0 : MAX_SETS * 4;
*upload_size = MAX2(*upload_size, 16);
*upload_size = MAX2(*upload_size + desc_size, 16);
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, *upload_size, upload_offset, upload_data)) {
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
@ -2121,8 +2146,24 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
}
} else {
struct radv_descriptor_state *descriptors_state =
radv_get_descriptors_state(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
params->bind_pipeline = 1;
params->pipeline_params_offset = layout->pipeline_params_offset;
for (unsigned i = 0; i < MAX_SETS; i++) {
uint32_t *uptr = ((uint32_t *)*upload_data) + i;
uint64_t set_va = 0;
if (descriptors_state->valid & (1u << i))
set_va = radv_descriptor_get_va(descriptors_state, i);
uptr[0] = set_va & 0xffffffff;
}
params->indirect_desc_sets_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + *upload_offset;
*upload_data = (char *)*upload_data + desc_size;
}
}

View file

@ -91,6 +91,12 @@ radv_get_compute_shader_metadata(const struct radv_device *device, const struct
metadata->push_const_sgpr = upload_sgpr | (inline_sgpr << 16);
metadata->inline_push_const_mask = cs->info.inline_push_constant_mask;
const struct radv_userdata_info *indirect_desc_sets_loc = radv_get_user_sgpr(cs, AC_UD_INDIRECT_DESCRIPTOR_SETS);
if (indirect_desc_sets_loc->sgpr_idx != -1) {
metadata->indirect_desc_sets_sgpr =
(cs->info.user_data_0 + 4 * indirect_desc_sets_loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
}
}
void

View file

@ -41,6 +41,7 @@ struct radv_compute_pipeline_metadata {
uint32_t grid_base_sgpr;
uint32_t push_const_sgpr;
uint64_t inline_push_const_mask;
uint32_t indirect_desc_sets_sgpr;
};
uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader_info *info);