mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
radv: add support for mesh primitives queries on GFX11
This is natively supported with new pipeline statistics. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26304>
This commit is contained in:
parent
4a6c18f503
commit
b24e07e9ed
1 changed files with 121 additions and 38 deletions
|
|
@ -1283,7 +1283,13 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
|
||||
pool->stride = 16;
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
/* GFX11 natively supports mesh generated primitives with pipeline statistics. */
|
||||
pool->stride = radv_get_pipelinestat_query_size(device) * 2;
|
||||
} else {
|
||||
assert(device->physical_device->emulate_mesh_shader_queries);
|
||||
pool->stride = 16;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("creating unhandled query type");
|
||||
|
|
@ -1291,7 +1297,9 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
|
|||
|
||||
pool->availability_offset = pool->stride * pCreateInfo->queryCount;
|
||||
pool->size = pool->availability_offset;
|
||||
if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS)
|
||||
if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
|
||||
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
|
||||
device->physical_device->rad_info.gfx_level >= GFX11))
|
||||
pool->size += 4 * pCreateInfo->queryCount;
|
||||
|
||||
result = device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT,
|
||||
|
|
@ -1607,24 +1615,41 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
|
||||
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
|
||||
uint64_t ms_prim_gen;
|
||||
|
||||
do {
|
||||
available = 1;
|
||||
if (!(p_atomic_read(&src64[0].value) & 0x8000000000000000UL) ||
|
||||
!(p_atomic_read(&src64[1].value) & 0x8000000000000000UL)) {
|
||||
available = 0;
|
||||
}
|
||||
} while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT) && !(timed_out = (atimeout < os_time_get_nano())));
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
|
||||
const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
|
||||
|
||||
do {
|
||||
available = p_atomic_read(avail_ptr);
|
||||
} while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT) &&
|
||||
!(timed_out = (atimeout < os_time_get_nano())));
|
||||
|
||||
const uint64_t *start = (uint64_t *)src;
|
||||
const uint64_t *stop = (uint64_t *)(src + pipelinestat_block_size);
|
||||
|
||||
ms_prim_gen = stop[pipeline_statistics_indices[13]] - start[pipeline_statistics_indices[13]];
|
||||
} else {
|
||||
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
|
||||
|
||||
do {
|
||||
available = 1;
|
||||
if (!(p_atomic_read(&src64[0].value) & 0x8000000000000000UL) ||
|
||||
!(p_atomic_read(&src64[1].value) & 0x8000000000000000UL)) {
|
||||
available = 0;
|
||||
}
|
||||
} while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT) &&
|
||||
!(timed_out = (atimeout < os_time_get_nano())));
|
||||
|
||||
ms_prim_gen = p_atomic_read_relaxed(&src64[1].value) - p_atomic_read_relaxed(&src64[0].value);
|
||||
}
|
||||
|
||||
if (timed_out)
|
||||
result = VK_ERROR_DEVICE_LOST;
|
||||
else if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
|
||||
result = VK_NOT_READY;
|
||||
|
||||
ms_prim_gen = p_atomic_read_relaxed(&src64[1].value) - p_atomic_read_relaxed(&src64[0].value);
|
||||
|
||||
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||||
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
|
||||
*(uint64_t *)dest = ms_prim_gen;
|
||||
|
|
@ -1844,22 +1869,41 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
pool->uses_gds && cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11);
|
||||
break;
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
for (unsigned i = 0; i < queryCount; i++) {
|
||||
unsigned query = firstQuery + i;
|
||||
uint64_t src_va = va + query * pool->stride;
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
|
||||
unsigned query = firstQuery + i;
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cs, 7 * 2);
|
||||
radeon_check_space(cmd_buffer->device->ws, cs, 7);
|
||||
|
||||
/* Wait on the upper word. */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 12, 0x80000000, 0xffffffff);
|
||||
uint64_t avail_va = va + pool->availability_offset + 4 * query;
|
||||
|
||||
/* This waits on the ME. All copies below are done on the ME */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
|
||||
}
|
||||
}
|
||||
}
|
||||
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
|
||||
pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset,
|
||||
pool->stride, stride, dst_size, queryCount, flags, 1 << 13,
|
||||
pool->availability_offset + 4 * firstQuery, false);
|
||||
} else {
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
for (unsigned i = 0; i < queryCount; i++) {
|
||||
unsigned query = firstQuery + i;
|
||||
uint64_t src_va = va + query * pool->stride;
|
||||
|
||||
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.ms_prim_gen_query_pipeline, pool->bo,
|
||||
dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride,
|
||||
dst_size, queryCount, flags, 0, 0, false);
|
||||
radeon_check_space(cmd_buffer->device->ws, cs, 7 * 2);
|
||||
|
||||
/* Wait on the upper word. */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 12, 0x80000000, 0xffffffff);
|
||||
}
|
||||
}
|
||||
|
||||
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.ms_prim_gen_query_pipeline, pool->bo,
|
||||
dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride,
|
||||
stride, dst_size, queryCount, flags, 0, 0, false);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("trying to get results of unhandled query type");
|
||||
|
|
@ -1898,7 +1942,9 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin
|
|||
flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo, radv_buffer_get_va(pool->bo) + firstQuery * pool->stride,
|
||||
queryCount * pool->stride, value);
|
||||
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
|
||||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
|
||||
cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11)) {
|
||||
flush_bits |=
|
||||
radv_fill_buffer(cmd_buffer, NULL, pool->bo,
|
||||
radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, queryCount * 4, 0);
|
||||
|
|
@ -1914,6 +1960,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin
|
|||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
|
||||
uint32_t value = query_clear_value(pool->vk.query_type);
|
||||
|
|
@ -1923,7 +1970,9 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery
|
|||
for (uint32_t *p = data; p != data_end; ++p)
|
||||
*p = value;
|
||||
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
|
||||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
|
||||
device->physical_device->rad_info.gfx_level >= GFX11)) {
|
||||
memset(pool->ptr + pool->availability_offset + firstQuery * 4, 0, queryCount * 4);
|
||||
}
|
||||
}
|
||||
|
|
@ -2193,16 +2242,29 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
radeon_check_space(cmd_buffer->device->ws, cs, 4);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
++cmd_buffer->state.active_pipeline_queries;
|
||||
|
||||
if (!cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
radv_update_hw_pipelinestat(cmd_buffer);
|
||||
|
||||
cmd_buffer->state.active_prims_gen_gds_queries++;
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
} else {
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
||||
if (!cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
||||
cmd_buffer->state.active_prims_gen_gds_queries++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
@ -2381,13 +2443,34 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device);
|
||||
|
||||
cmd_buffer->state.active_prims_gen_gds_queries--;
|
||||
radeon_check_space(cmd_buffer->device->ws, cs, 16);
|
||||
|
||||
if (!cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
cmd_buffer->state.active_pipeline_queries--;
|
||||
|
||||
radv_update_hw_pipelinestat(cmd_buffer);
|
||||
|
||||
va += pipelinestat_block_size;
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
|
||||
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
|
||||
avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
|
||||
} else {
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_prims_gen_gds_queries--;
|
||||
|
||||
if (!cmd_buffer->state.active_prims_gen_gds_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue