From d44651bfc3c8f6cb6aba8914f5bd21e440d69a4c Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 19 Apr 2023 16:28:48 +0200 Subject: [PATCH] radv: wait for occlusion queries in the resolve query shader This is really noticeable for games that resolve a bunch of occlusion queries (in this case 4096) because it seems that emitting 4096 WAIT_REG_MEM packets can stall more than expected. Fixes this by waiting for queries in the resolve query shader. This improves performance of an unreleased game by +~10% (71->78 FPS). RADV should now be really close to Windows performance for that title. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_query.c | 38 ++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index ffdbdb85022..a13c6d6034c 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -139,6 +139,31 @@ build_occlusion_query_shader(struct radv_device *device) nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1); nir_store_var(&b, available, nir_imm_true(&b), 0x1); + nir_ssa_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT); + nir_push_if(&b, query_result_wait); + { + /* Wait on the upper word of the last DB entry. */ + nir_push_loop(&b); + { + const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4; + + /* Prevent the SSBO load to be moved out of the loop. */ + nir_scoped_memory_barrier(&b, NIR_SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo); + + nir_ssa_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset); + nir_ssa_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, + .access = ACCESS_COHERENT); + + nir_push_if(&b, nir_ige(&b, load, nir_imm_int(&b, 0x80000000))); + { + nir_jump(&b, nir_jump_break); + } + nir_pop_if(&b, NULL); + } + nir_pop_loop(&b, NULL); + } + nir_pop_if(&b, NULL); + nir_push_loop(&b); nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter); @@ -1541,19 +1566,6 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - uint64_t enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask; - uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4; - for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) { - unsigned query = firstQuery + i; - uint64_t src_va = va + query * pool->stride + rb_avail_offset; - - radeon_check_space(cmd_buffer->device->ws, cs, 7); - - /* Waits on the upper word of the last DB entry */ - radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff); - } - } radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,