radv: fix waiting for occlusion queries on GFX6-8

Occlusion queries don't go through L2 on GFX6-8, and waiting properly
in shaders is more complicated to implement. Use the previous
WAIT_REG_MEM logic on these GPUs to fix this.

This fixes flickering on many games on GFX8.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8954
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9415
Fixes: d44651bfc3 ("radv: wait for occlusion queries in the resolve query shader")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28796>
(cherry picked from commit e18cc3b39b)
This commit is contained in:
Samuel Pitoiset 2024-04-18 08:26:22 +02:00 committed by Eric Engestrom
parent 2f6cec1ed6
commit e4cac5d357
3 changed files with 49 additions and 83 deletions

View file

@ -674,7 +674,7 @@
"description": "radv: fix waiting for occlusion queries on GFX6-8",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "d44651bfc3c8f6cb6aba8914f5bd21e440d69a4c",
"notes": null

View file

@ -61,6 +61,16 @@ radv_get_pipelinestat_query_size(struct radv_device *device)
return num_results * 8;
}
static bool
radv_occlusion_query_use_l2(const struct radv_physical_device *pdev)
{
/* Occlusion query writes don't go through L2 on GFX6-8 which means the driver would need to
* flush caches before every read in shaders or use MTYPE=3 (ie. uncached) in the buffer
* descriptor to bypass L2. Use the WAIT_REG_MEM logic instead which is easier to implement.
*/
return pdev->rad_info.gfx_level >= GFX9;
}
static void
radv_store_availability(nir_builder *b, nir_def *flags, nir_def *dst_buf, nir_def *offset, nir_def *value32)
{
@ -148,29 +158,31 @@ build_occlusion_query_shader(struct radv_device *device)
nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
nir_store_var(&b, available, nir_imm_true(&b), 0x1);
nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
nir_push_if(&b, query_result_wait);
{
/* Wait on the upper word of the last DB entry. */
nir_push_loop(&b);
if (radv_occlusion_query_use_l2(device->physical_device)) {
nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
nir_push_if(&b, query_result_wait);
{
const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
/* Prevent the SSBO load to be moved out of the loop. */
nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
/* Wait on the upper word of the last DB entry. */
nir_push_loop(&b);
{
nir_jump(&b, nir_jump_break);
const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
/* Prevent the SSBO load to be moved out of the loop. */
nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
{
nir_jump(&b, nir_jump_break);
}
nir_pop_if(&b, NULL);
}
nir_pop_if(&b, NULL);
nir_pop_loop(&b, NULL);
}
nir_pop_loop(&b, NULL);
nir_pop_if(&b, NULL);
}
nir_pop_if(&b, NULL);
nir_push_loop(&b);
@ -1667,6 +1679,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
struct radv_device *device = cmd_buffer->device;
struct radv_physical_device *pdev = device->physical_device;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
@ -1697,6 +1711,22 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
switch (pool->vk.query_type) {
case VK_QUERY_TYPE_OCCLUSION:
if (!radv_occlusion_query_use_l2(pdev)) {
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
uint64_t enabled_rb_mask = pdev->rad_info.enabled_rb_mask;
uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
unsigned query = firstQuery + i;
uint64_t src_va = va + query * pool->stride + rb_avail_offset;
radeon_check_space(device->ws, cs, 7);
/* Waits on the upper word of the last DB entry */
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff);
}
}
}
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline, pool->bo,
dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride,
dst_size, queryCount, flags, 0, 0, false);

View file

@ -15,8 +15,6 @@ spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@MS8,Fail
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail
@ -321,69 +319,11 @@ dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth24_stencil8
dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth32f_stencil8,Fail
dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_2d_array,Fail
dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth32f_stencil8_2d_array,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.all,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.height,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.samples,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.size,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.width,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples0,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples1,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples2,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples3,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples4,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1023x1023,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1025x1025,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.127x127,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.127x15,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.129x127,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.129x129,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x15,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x511,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.17x17,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1x1,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.2047x1025,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.2047x2047,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.255x255,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.257x257,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.31x31,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.33x33,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.3x3,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.511x127,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.511x511,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.513x513,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.63x63,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.65x65,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.0,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.1,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.10,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.11,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.12,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.13,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.14,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.15,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.2,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.3,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.4,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.5,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.6,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.7,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.8,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.9,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.1024x1024,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.1024x16,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.1024x256,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.1024x64,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.16x1024,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.16x256,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.16x64,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.256x1024,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.256x16,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.256x256,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.256x64,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.64x1024,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.64x16,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.64x256,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.64x64,Fail
dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth24_stencil8_cube_array,Fail
dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth32f_stencil8_cube_array,Fail
dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_cube_array,Fail
@ -429,10 +369,6 @@ spec@arb_depth_buffer_float@fbo-clear-formats,Fail
spec@arb_depth_buffer_float@fbo-clear-formats@GL_DEPTH32F_STENCIL8,Fail
spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled,Fail
spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled@GL_RGB565- swizzled- border color only,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@Basic,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@discard,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@fb resize,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@glViewport,Fail
spec@arb_sample_locations@test,Fail
spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail
spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail