mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 02:10:11 +01:00
radv: emit indirect sets for indirect compute pipelines with DGC
This used to work by luck because the current DGC prepare shader is using one descriptor set and it was the currently bound compute shader... Using two descriptor sets or starting from 1 would just fail. For indirect compute pipelines, descriptors must be emitted from the DGC shader because there is no bound compute pipeline at all. This solution is using indirect descriptor sets because it's much shorter and easier to implement. This could be improved but nothing uses indirect compute pipelines and this is like experimental stuff. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29700>
This commit is contained in:
parent
b1ba02e707
commit
33a849e004
5 changed files with 59 additions and 6 deletions
|
|
@ -943,7 +943,7 @@ radv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, va, false);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
uint64_t
|
||||
radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx)
|
||||
{
|
||||
struct radv_descriptor_set *set = descriptors_state->sets[set_idx];
|
||||
|
|
@ -11599,6 +11599,9 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
|
|||
if (compute) {
|
||||
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
if (!pGeneratedCommandsInfo->pipeline)
|
||||
radv_mark_descriptor_sets_dirty(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
|
||||
radv_dgc_after_dispatch(cmd_buffer);
|
||||
} else {
|
||||
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
|
||||
|
|
|
|||
|
|
@ -779,4 +779,6 @@ void radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64
|
|||
|
||||
void radv_end_conditional_rendering(struct radv_cmd_buffer *cmd_buffer);
|
||||
|
||||
uint64_t radv_descriptor_get_va(const struct radv_descriptor_state *descriptors_state, unsigned set_idx);
|
||||
|
||||
#endif /* RADV_CMD_BUFFER_H */
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
static void
|
||||
radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout,
|
||||
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size)
|
||||
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size, uint32_t *upload_size)
|
||||
{
|
||||
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
|
@ -59,6 +59,12 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
|
|||
/* PKT3_SET_SH_REG for pointer */
|
||||
*cmd_size += 4 * 4;
|
||||
}
|
||||
|
||||
/* PKT3_SET_SH_REG for indirect descriptor sets pointer */
|
||||
*cmd_size += 3 * 4;
|
||||
|
||||
/* Reserve space for indirect pipelines because they might use indirect descriptor sets. */
|
||||
*upload_size += MAX_SETS * 4;
|
||||
}
|
||||
|
||||
if (device->sqtt.bo) {
|
||||
|
|
@ -169,7 +175,7 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
|
|||
} else {
|
||||
assert(layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
struct radv_compute_pipeline *compute_pipeline = pipeline ? radv_pipeline_to_compute(pipeline) : NULL;
|
||||
radv_get_sequence_size_compute(layout, compute_pipeline, cmd_size);
|
||||
radv_get_sequence_size_compute(layout, compute_pipeline, cmd_size, upload_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -264,6 +270,9 @@ struct radv_dgc_params {
|
|||
|
||||
uint8_t bind_pipeline;
|
||||
uint16_t pipeline_params_offset;
|
||||
|
||||
/* For indirect descriptor sets */
|
||||
uint32_t indirect_desc_sets_va;
|
||||
};
|
||||
|
||||
enum {
|
||||
|
|
@ -1080,6 +1089,9 @@ dgc_emit_push_constant(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *pus
|
|||
|
||||
nir_def *param_buf = radv_meta_load_descriptor(b, 0, 0);
|
||||
nir_def *param_offset = nir_imul_imm(b, vbo_cnt, 24);
|
||||
param_offset = nir_iadd(
|
||||
b, param_offset,
|
||||
nir_bcsel(b, nir_ieq_imm(b, load_param8(b, bind_pipeline), 1), nir_imm_int(b, MAX_SETS * 4), nir_imm_int(b, 0)));
|
||||
nir_def *param_offset_offset = nir_iadd_imm(b, param_offset, MESA_VULKAN_SHADER_STAGES * 12);
|
||||
nir_def *param_const_offset =
|
||||
nir_iadd_imm(b, param_offset, MAX_PUSH_CONSTANTS_SIZE + MESA_VULKAN_SHADER_STAGES * 12);
|
||||
|
|
@ -1499,7 +1511,7 @@ dgc_emit_draw_mesh_tasks(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *d
|
|||
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NV.
|
||||
*/
|
||||
static void
|
||||
dgc_emit_bind_pipeline(struct dgc_cmdbuf *cs, nir_def *stream_addr)
|
||||
dgc_emit_bind_pipeline(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_variable *upload_offset)
|
||||
{
|
||||
const struct radv_device *device = cs->dev;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
|
@ -1527,7 +1539,19 @@ dgc_emit_bind_pipeline(struct dgc_cmdbuf *cs, nir_def *stream_addr)
|
|||
dgc_cs_emit(load_metadata32(b, block_size_x));
|
||||
dgc_cs_emit(load_metadata32(b, block_size_y));
|
||||
dgc_cs_emit(load_metadata32(b, block_size_z));
|
||||
|
||||
nir_def *indirect_desc_sets_sgpr = load_metadata32(b, indirect_desc_sets_sgpr);
|
||||
nir_push_if(b, nir_ine_imm(b, indirect_desc_sets_sgpr, 0));
|
||||
{
|
||||
dgc_cs_emit_imm(PKT3(PKT3_SET_SH_REG, 1, 0));
|
||||
dgc_cs_emit(indirect_desc_sets_sgpr);
|
||||
dgc_cs_emit(load_param32(b, indirect_desc_sets_va));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
dgc_cs_end();
|
||||
|
||||
nir_store_var(b, upload_offset, nir_iadd_imm(b, nir_load_var(b, upload_offset), MAX_SETS * 4), 0x1);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -1637,7 +1661,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
|
||||
nir_push_if(&b, nir_ieq_imm(&b, load_param8(&b, bind_pipeline), 1));
|
||||
{
|
||||
dgc_emit_bind_pipeline(&cmd_buf, stream_addr);
|
||||
dgc_emit_bind_pipeline(&cmd_buf, stream_addr, upload_offset);
|
||||
}
|
||||
nir_pop_if(&b, 0);
|
||||
|
||||
|
|
@ -2089,8 +2113,9 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
|
|||
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t desc_size = pipeline ? 0 : MAX_SETS * 4;
|
||||
|
||||
*upload_size = MAX2(*upload_size, 16);
|
||||
*upload_size = MAX2(*upload_size + desc_size, 16);
|
||||
|
||||
if (!radv_cmd_buffer_upload_alloc(cmd_buffer, *upload_size, upload_offset, upload_data)) {
|
||||
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
|
@ -2121,8 +2146,24 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
|
|||
params->grid_base_sgpr = (cs->info.user_data_0 + 4 * loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
|
||||
}
|
||||
} else {
|
||||
struct radv_descriptor_state *descriptors_state =
|
||||
radv_get_descriptors_state(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
|
||||
params->bind_pipeline = 1;
|
||||
params->pipeline_params_offset = layout->pipeline_params_offset;
|
||||
|
||||
for (unsigned i = 0; i < MAX_SETS; i++) {
|
||||
uint32_t *uptr = ((uint32_t *)*upload_data) + i;
|
||||
uint64_t set_va = 0;
|
||||
if (descriptors_state->valid & (1u << i))
|
||||
set_va = radv_descriptor_get_va(descriptors_state, i);
|
||||
|
||||
uptr[0] = set_va & 0xffffffff;
|
||||
}
|
||||
|
||||
params->indirect_desc_sets_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + *upload_offset;
|
||||
|
||||
*upload_data = (char *)*upload_data + desc_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -91,6 +91,12 @@ radv_get_compute_shader_metadata(const struct radv_device *device, const struct
|
|||
|
||||
metadata->push_const_sgpr = upload_sgpr | (inline_sgpr << 16);
|
||||
metadata->inline_push_const_mask = cs->info.inline_push_constant_mask;
|
||||
|
||||
const struct radv_userdata_info *indirect_desc_sets_loc = radv_get_user_sgpr(cs, AC_UD_INDIRECT_DESCRIPTOR_SETS);
|
||||
if (indirect_desc_sets_loc->sgpr_idx != -1) {
|
||||
metadata->indirect_desc_sets_sgpr =
|
||||
(cs->info.user_data_0 + 4 * indirect_desc_sets_loc->sgpr_idx - SI_SH_REG_OFFSET) >> 2;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ struct radv_compute_pipeline_metadata {
|
|||
uint32_t grid_base_sgpr;
|
||||
uint32_t push_const_sgpr;
|
||||
uint64_t inline_push_const_mask;
|
||||
uint32_t indirect_desc_sets_sgpr;
|
||||
};
|
||||
|
||||
uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader_info *info);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue