mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 05:58:05 +02:00
radv: add support for task shader invocations queries on GFX11
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26304>
This commit is contained in:
parent
b24e07e9ed
commit
35e0c88f49
1 changed files with 53 additions and 30 deletions
|
|
@ -302,23 +302,25 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
|
|||
nir_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset);
|
||||
nir_store_var(&b, available, nir_i2b(&b, available32), 0x1);
|
||||
|
||||
nir_push_if(&b, nir_test_mask(&b, stats_mask, VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT));
|
||||
{
|
||||
const uint32_t idx = ffs(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT) - 1;
|
||||
if (device->physical_device->emulate_mesh_shader_queries) {
|
||||
nir_push_if(&b, nir_test_mask(&b, stats_mask, VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT));
|
||||
{
|
||||
const uint32_t idx = ffs(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT) - 1;
|
||||
|
||||
nir_def *avail_start_offset = nir_iadd_imm(&b, input_base, pipeline_statistics_indices[idx] * 8 + 4);
|
||||
nir_def *avail_start = nir_load_ssbo(&b, 1, 32, src_buf, avail_start_offset);
|
||||
nir_def *avail_start_offset = nir_iadd_imm(&b, input_base, pipeline_statistics_indices[idx] * 8 + 4);
|
||||
nir_def *avail_start = nir_load_ssbo(&b, 1, 32, src_buf, avail_start_offset);
|
||||
|
||||
nir_def *avail_end_offset =
|
||||
nir_iadd_imm(&b, input_base, pipeline_statistics_indices[idx] * 8 + pipelinestat_block_size + 4);
|
||||
nir_def *avail_end = nir_load_ssbo(&b, 1, 32, src_buf, avail_end_offset);
|
||||
nir_def *avail_end_offset =
|
||||
nir_iadd_imm(&b, input_base, pipeline_statistics_indices[idx] * 8 + pipelinestat_block_size + 4);
|
||||
nir_def *avail_end = nir_load_ssbo(&b, 1, 32, src_buf, avail_end_offset);
|
||||
|
||||
nir_def *task_invoc_result_available =
|
||||
nir_i2b(&b, nir_iand_imm(&b, nir_iand(&b, avail_start, avail_end), 0x80000000));
|
||||
nir_def *task_invoc_result_available =
|
||||
nir_i2b(&b, nir_iand_imm(&b, nir_iand(&b, avail_start, avail_end), 0x80000000));
|
||||
|
||||
nir_store_var(&b, available, nir_iand(&b, nir_load_var(&b, available), task_invoc_result_available), 0x1);
|
||||
nir_store_var(&b, available, nir_iand(&b, nir_load_var(&b, available), task_invoc_result_available), 0x1);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
|
|
@ -1243,8 +1245,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
|
|||
pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT));
|
||||
|
||||
/* The number of task shader invocations needs to be queried on ACE. */
|
||||
pool->uses_ace = device->physical_device->emulate_mesh_shader_queries &&
|
||||
(pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
pool->uses_ace = (pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
|
||||
switch (pCreateInfo->queryType) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
|
|
@ -1462,7 +1463,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
|||
do {
|
||||
available = p_atomic_read(avail_ptr);
|
||||
|
||||
if (pool->uses_ace) {
|
||||
if (pool->uses_ace && device->physical_device->emulate_mesh_shader_queries) {
|
||||
const uint32_t task_invoc_offset =
|
||||
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
const uint32_t *avail_ptr_start = (const uint32_t *)(src + task_invoc_offset + 4);
|
||||
|
|
@ -1782,7 +1783,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
/* This waits on the ME. All copies below are done on the ME */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
|
||||
|
||||
if (pool->uses_ace) {
|
||||
if (pool->uses_ace && cmd_buffer->device->physical_device->emulate_mesh_shader_queries) {
|
||||
const uint64_t src_va = va + query * pool->stride;
|
||||
const uint64_t start_va = src_va + task_invoc_offset + 4;
|
||||
const uint64_t stop_va = start_va + pipelinestat_block_size;
|
||||
|
|
@ -2151,18 +2152,29 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
|||
uint32_t task_invoc_offset =
|
||||
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 11);
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
va += task_invoc_offset;
|
||||
|
||||
gfx10_copy_gds_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
|
||||
radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
radeon_emit(cmd_buffer->gang.cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cmd_buffer->gang.cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||
radeon_emit(cmd_buffer->gang.cs, va);
|
||||
radeon_emit(cmd_buffer->gang.cs, va >> 32);
|
||||
} else {
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 11);
|
||||
|
||||
if (!cmd_buffer->state.active_pipeline_ace_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
gfx10_copy_gds_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
|
||||
radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_pipeline_ace_queries++;
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
||||
if (!cmd_buffer->state.active_pipeline_ace_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
cmd_buffer->state.active_pipeline_ace_queries++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -2360,15 +2372,26 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
uint32_t task_invoc_offset =
|
||||
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 11);
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
va += task_invoc_offset;
|
||||
|
||||
gfx10_copy_gds_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
|
||||
radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4);
|
||||
|
||||
cmd_buffer->state.active_pipeline_ace_queries--;
|
||||
radeon_emit(cmd_buffer->gang.cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cmd_buffer->gang.cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||
radeon_emit(cmd_buffer->gang.cs, va);
|
||||
radeon_emit(cmd_buffer->gang.cs, va >> 32);
|
||||
} else {
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 11);
|
||||
|
||||
if (!cmd_buffer->state.active_pipeline_ace_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
gfx10_copy_gds_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
|
||||
radv_cs_write_data_imm(cmd_buffer->gang.cs, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_pipeline_ace_queries--;
|
||||
|
||||
if (!cmd_buffer->state.active_pipeline_ace_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
}
|
||||
|
||||
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue