From e4cac5d357510e903646911fd21eb8d2bd7004a5 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 18 Apr 2024 08:26:22 +0200 Subject: [PATCH] radv: fix waiting for occlusion queries on GFX6-8 Occlusion queries don't go through L2 on GFX6-8, and waiting properly in shaders is more complicated to implement. Use the previous WAIT_REG_MEM logic on these GPUs to fix this. This fixes flickering on many games on GFX8. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8954 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9415 Fixes: d44651bfc3c ("radv: wait for occlusion queries in the resolve query shader") Signed-off-by: Samuel Pitoiset Part-of: (cherry picked from commit e18cc3b39b118644ef1f7cccdca72e5e6f1a0519) --- .pick_status.json | 2 +- src/amd/vulkan/radv_query.c | 66 ++++++++++++++----- .../zink/ci/zink-radv-polaris10-fails.txt | 64 ------------------ 3 files changed, 49 insertions(+), 83 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 5b824fd80be..fa2b00edd78 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -674,7 +674,7 @@ "description": "radv: fix waiting for occlusion queries on GFX6-8", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "d44651bfc3c8f6cb6aba8914f5bd21e440d69a4c", "notes": null diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index df50f969f5e..bbd3ba6dbce 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -61,6 +61,16 @@ radv_get_pipelinestat_query_size(struct radv_device *device) return num_results * 8; } +static bool +radv_occlusion_query_use_l2(const struct radv_physical_device *pdev) +{ + /* Occlusion query writes don't go through L2 on GFX6-8 which means the driver would need to + * flush caches before every read in shaders or use MTYPE=3 (ie. uncached) in the buffer + * descriptor to bypass L2. Use the WAIT_REG_MEM logic instead which is easier to implement. + */ + return pdev->rad_info.gfx_level >= GFX9; +} + static void radv_store_availability(nir_builder *b, nir_def *flags, nir_def *dst_buf, nir_def *offset, nir_def *value32) { @@ -148,29 +158,31 @@ build_occlusion_query_shader(struct radv_device *device) nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1); nir_store_var(&b, available, nir_imm_true(&b), 0x1); - nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT); - nir_push_if(&b, query_result_wait); - { - /* Wait on the upper word of the last DB entry. */ - nir_push_loop(&b); + if (radv_occlusion_query_use_l2(device->physical_device)) { + nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT); + nir_push_if(&b, query_result_wait); { - const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4; - - /* Prevent the SSBO load to be moved out of the loop. */ - nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo); - - nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset); - nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT); - - nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000)); + /* Wait on the upper word of the last DB entry. */ + nir_push_loop(&b); { - nir_jump(&b, nir_jump_break); + const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4; + + /* Prevent the SSBO load to be moved out of the loop. */ + nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo); + + nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset); + nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT); + + nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000)); + { + nir_jump(&b, nir_jump_break); + } + nir_pop_if(&b, NULL); } - nir_pop_if(&b, NULL); + nir_pop_loop(&b, NULL); } - nir_pop_loop(&b, NULL); + nir_pop_if(&b, NULL); } - nir_pop_if(&b, NULL); nir_push_loop(&b); @@ -1667,6 +1679,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); + struct radv_device *device = cmd_buffer->device; + struct radv_physical_device *pdev = device->physical_device; struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo); @@ -1697,6 +1711,22 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo switch (pool->vk.query_type) { case VK_QUERY_TYPE_OCCLUSION: + if (!radv_occlusion_query_use_l2(pdev)) { + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + uint64_t enabled_rb_mask = pdev->rad_info.enabled_rb_mask; + uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4; + for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) { + unsigned query = firstQuery + i; + uint64_t src_va = va + query * pool->stride + rb_avail_offset; + + radeon_check_space(device->ws, cs, 7); + + /* Waits on the upper word of the last DB entry */ + radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff); + } + } + } + radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, queryCount, flags, 0, 0, false); diff --git a/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt index b60b9570c4d..b7fa3416124 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-polaris10-fails.txt @@ -15,8 +15,6 @@ spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash -spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query,Fail -spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@MS8,Fail spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail @@ -321,69 +319,11 @@ dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth24_stencil8 dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth32f_stencil8,Fail dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_2d_array,Fail dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth32f_stencil8_2d_array,Fail -dEQP-GLES31.functional.fbo.no_attachments.maximums.all,Fail -dEQP-GLES31.functional.fbo.no_attachments.maximums.height,Fail -dEQP-GLES31.functional.fbo.no_attachments.maximums.samples,Fail -dEQP-GLES31.functional.fbo.no_attachments.maximums.size,Fail -dEQP-GLES31.functional.fbo.no_attachments.maximums.width,Fail -dEQP-GLES31.functional.fbo.no_attachments.multisample.samples0,Fail -dEQP-GLES31.functional.fbo.no_attachments.multisample.samples1,Fail -dEQP-GLES31.functional.fbo.no_attachments.multisample.samples2,Fail -dEQP-GLES31.functional.fbo.no_attachments.multisample.samples3,Fail -dEQP-GLES31.functional.fbo.no_attachments.multisample.samples4,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.1023x1023,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.1025x1025,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.127x127,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.127x15,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.129x127,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.129x129,Fail dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x15,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x511,Fail dEQP-GLES31.functional.fbo.no_attachments.npot_size.17x17,Fail dEQP-GLES31.functional.fbo.no_attachments.npot_size.1x1,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.2047x1025,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.2047x2047,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.255x255,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.257x257,Fail dEQP-GLES31.functional.fbo.no_attachments.npot_size.31x31,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.33x33,Fail dEQP-GLES31.functional.fbo.no_attachments.npot_size.3x3,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.511x127,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.511x511,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.513x513,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.63x63,Fail -dEQP-GLES31.functional.fbo.no_attachments.npot_size.65x65,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.0,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.1,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.10,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.11,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.12,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.13,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.14,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.15,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.2,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.3,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.4,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.5,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.6,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.7,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.8,Fail -dEQP-GLES31.functional.fbo.no_attachments.random.9,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.1024x1024,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.1024x16,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.1024x256,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.1024x64,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.16x1024,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.16x256,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.16x64,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.256x1024,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.256x16,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.256x256,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.256x64,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.64x1024,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.64x16,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.64x256,Fail -dEQP-GLES31.functional.fbo.no_attachments.size.64x64,Fail dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth24_stencil8_cube_array,Fail dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth32f_stencil8_cube_array,Fail dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_cube_array,Fail @@ -429,10 +369,6 @@ spec@arb_depth_buffer_float@fbo-clear-formats,Fail spec@arb_depth_buffer_float@fbo-clear-formats@GL_DEPTH32F_STENCIL8,Fail spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled,Fail spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled@GL_RGB565- swizzled- border color only,Fail -spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@Basic,Fail -spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@discard,Fail -spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@fb resize,Fail -spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@glViewport,Fail spec@arb_sample_locations@test,Fail spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail