mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-13 01:10:15 +01:00
radv: wait for occlusion queries in the resolve query shader
This is really noticeable for games that resolve a bunch of occlusion queries (in this case 4096) because it seems that emitting 4096 WAIT_REG_MEM packets can stall more than expected. Fixes this by waiting for queries in the resolve query shader. This improves performance of an unreleased game by +~10% (71->78 FPS). RADV should now be really close to Windows performance for that title. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22579>
This commit is contained in:
parent
1e6e3427f0
commit
d44651bfc3
1 changed files with 25 additions and 13 deletions
|
|
@ -139,6 +139,31 @@ build_occlusion_query_shader(struct radv_device *device)
|
|||
nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
|
||||
nir_store_var(&b, available, nir_imm_true(&b), 0x1);
|
||||
|
||||
nir_ssa_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
|
||||
nir_push_if(&b, query_result_wait);
|
||||
{
|
||||
/* Wait on the upper word of the last DB entry. */
|
||||
nir_push_loop(&b);
|
||||
{
|
||||
const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
|
||||
|
||||
/* Prevent the SSBO load to be moved out of the loop. */
|
||||
nir_scoped_memory_barrier(&b, NIR_SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
|
||||
|
||||
nir_ssa_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
|
||||
nir_ssa_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4,
|
||||
.access = ACCESS_COHERENT);
|
||||
|
||||
nir_push_if(&b, nir_ige(&b, load, nir_imm_int(&b, 0x80000000)));
|
||||
{
|
||||
nir_jump(&b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
}
|
||||
nir_pop_loop(&b, NULL);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_push_loop(&b);
|
||||
|
||||
nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
|
||||
|
|
@ -1541,19 +1566,6 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
uint64_t enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask;
|
||||
uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
|
||||
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
|
||||
unsigned query = firstQuery + i;
|
||||
uint64_t src_va = va + query * pool->stride + rb_avail_offset;
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cs, 7);
|
||||
|
||||
/* Waits on the upper word of the last DB entry */
|
||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff);
|
||||
}
|
||||
}
|
||||
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
|
||||
pool->bo, dst_buffer->bo, firstQuery * pool->stride,
|
||||
dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue