From 4192e01dcc49384404cc0b79eaeed5bacf8fc292 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 5 Oct 2023 12:21:49 +0200 Subject: [PATCH] radv: always write the sample positions when a new descriptor BO is created This was completely broken, for example in the following scenario: - submits something which needs sample positions (this creates a new descriptor BO with sample positions) - submits something which needs the tess rings (this creates a new descriptor BO with tess rings but without sample positions, ie. add_sample_positions would be FALSE) - submits something which needs sample positions again (this won't create a new descriptor BO because it incorrectly remembered that sample positions were set) Fix this by always writing the sample positions. This should fix the following flakes: - dEQP-VK.fragment_shading_barycentric.*.weights.pipeline_topology_dynamic.msaa_interpolate_at_sample.* - dEQP-VK.pipeline.fast_linked_library.multisample_interpolation.sample_interpolate_at_distinct_values.* - dEQP-VK.pipeline.fast_linked_library.multisample_interpolation.sample_interpolation_consistency.* - dEQP-VK.draw.renderpass.linear_interpolation.* - dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.* These flakes were extremely hard to reproduce! Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/ci/radv-navi10-aco-flakes.txt | 3 -- src/amd/ci/radv-navi21-aco-fails.txt | 2 -- src/amd/ci/radv-navi21-aco-flakes.txt | 11 ------- src/amd/ci/radv-navi21-llvm-fails.txt | 6 ---- src/amd/ci/radv-polaris10-aco-fails.txt | 3 -- src/amd/ci/radv-polaris10-aco-flakes.txt | 2 -- src/amd/ci/radv-raven-flakes.txt | 4 --- src/amd/ci/radv-renoir-aco-flakes.txt | 2 -- src/amd/ci/radv-vangogh-aco-fails.txt | 2 -- src/amd/ci/radv-vangogh-aco-flakes.txt | 10 ------- src/amd/ci/radv-vega10-aco-flakes.txt | 2 -- src/amd/vulkan/radv_queue.c | 37 ++++++++---------------- 12 files changed, 12 insertions(+), 72 deletions(-) diff --git a/src/amd/ci/radv-navi10-aco-flakes.txt b/src/amd/ci/radv-navi10-aco-flakes.txt index af305ac3c1f..2e90d586083 100644 --- a/src/amd/ci/radv-navi10-aco-flakes.txt +++ b/src/amd/ci/radv-navi10-aco-flakes.txt @@ -1,6 +1,3 @@ -dEQP-VK.draw.renderpass.linear_interpolation.offset_max_2_samples -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.no_offset_2_samples -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.offset_min_2_samples dEQP-VK.dynamic_rendering.basic.* dEQP-VK.dynamic_rendering.primary_cmd_buff.*resuming.* dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.contents_secondary_cmdbuffers diff --git a/src/amd/ci/radv-navi21-aco-fails.txt b/src/amd/ci/radv-navi21-aco-fails.txt index cf99b2750ee..ef888915269 100644 --- a/src/amd/ci/radv-navi21-aco-fails.txt +++ b/src/amd/ci/radv-navi21-aco-fails.txt @@ -1,4 +1,2 @@ # New CTS failures in 1.3.6.3 -dEQP-VK.fragment_shading_barycentric.weights.pipeline_topology_dynamic.msaa_interpolate_at_sample.triangle_fan.noperspective,Fail -dEQP-VK.fragment_shading_barycentric.weights.pipeline_topology_dynamic.msaa_interpolate_at_sample.triangle_fan.perspective,Fail dEQP-VK.fragment_shading_rate.renderpass2.monolithic.attachment_rate.misc.ro_ds_stencil_read_only_optimal,Crash diff --git a/src/amd/ci/radv-navi21-aco-flakes.txt b/src/amd/ci/radv-navi21-aco-flakes.txt index 213dc8c5d9a..b6e62fed8ad 100644 --- a/src/amd/ci/radv-navi21-aco-flakes.txt +++ b/src/amd/ci/radv-navi21-aco-flakes.txt @@ -1,5 +1,3 @@ -dEQP-VK.draw.dynamic_rendering.complete_secondary_cmd_buff.linear_interpolation.offset_max_2_samples -dEQP-VK.draw.renderpass.linear_interpolation.no_offset_\d_samples dEQP-VK.dynamic_rendering.basic.* dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.* dEQP-VK.pipeline.monolithic.image.suballocation.sampling_type.separate.view_type.2d.format.a2b10g10r10_uint_pack32.count_4.size.13x23 @@ -8,17 +6,8 @@ dEQP-VK.pipeline.monolithic.image.suballocation.sampling_type.combined.view_type # Came with the split of the test lists in CI dEQP-VK.rasterization.primitives_multisample_2_bit.static_stipple.bresenham_line_strip_wide -# This fail is 100% reproducible when running vkcts in parallel mode, -# but isn't reproducible when running with the normal full test list. -# Given that we want both CI and developers to be able to get clean -# runs, let's mark this test as a flake until it gets fixed. -# -# See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7220 -dEQP-VK.draw.renderpass.linear_interpolation.no_offset_\d_samples dEQP-VK.draw.renderpass.multi_draw.mosaic.indexed_mixed.max_draws.stride_extra_12.2_instances_base_3.with_tess.multiview.offset_6 -dEQP-VK.pipeline.fast_linked_library.multisample_interpolation.sample_interpolate_at_ignores_centroid.128_128_1.samples_8 - dEQP-VK.mesh_shader.ext.query.all_stats_query.triangles.host_reset.copy.wait.indirect_draw.32bit.with_availability.single_block.task_mesh.inside_rp.single_view.only_primary # Too many of these tests to follow, so let's cast a broad net diff --git a/src/amd/ci/radv-navi21-llvm-fails.txt b/src/amd/ci/radv-navi21-llvm-fails.txt index de7e83135d9..15922d1f7cd 100644 --- a/src/amd/ci/radv-navi21-llvm-fails.txt +++ b/src/amd/ci/radv-navi21-llvm-fails.txt @@ -5,12 +5,6 @@ dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.tanh_ dEQP-VK.draw.renderpass.shader_invocation.helper_invocation,Fail -dEQP-VK.fragment_shading_barycentric.weights.pipeline_topology_dynamic.msaa_interpolate_at_offset.triangle_fan.perspective,Fail -dEQP-VK.fragment_shading_barycentric.weights.pipeline_topology_dynamic.msaa_interpolate_at_offset.triangle_list.perspective,Fail -dEQP-VK.fragment_shading_barycentric.weights.pipeline_topology_dynamic.msaa_interpolate_at_offset.triangle_list_with_adjacency.perspective,Fail -dEQP-VK.fragment_shading_barycentric.weights.pipeline_topology_dynamic.msaa_interpolate_at_offset.triangle_strip.perspective,Fail -dEQP-VK.fragment_shading_barycentric.weights.pipeline_topology_dynamic.msaa_interpolate_at_offset.triangle_strip_with_adjacency.perspective,Fail - dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r32g32b32a32_sfloat.samples_8,Fail dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_1.r8g8b8a8_unorm.samples_8,Fail dEQP-VK.pipeline.monolithic.multisample.storage_image.64x64_4.r32g32b32a32_sfloat.samples_8,Fail diff --git a/src/amd/ci/radv-polaris10-aco-fails.txt b/src/amd/ci/radv-polaris10-aco-fails.txt index 8acf51742c7..7864a790e80 100644 --- a/src/amd/ci/radv-polaris10-aco-fails.txt +++ b/src/amd/ci/radv-polaris10-aco-fails.txt @@ -17,6 +17,3 @@ dEQP-VK.texture.mipmap.3d.image_view_min_lod.base_level.nearest_nearest_integer_ dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.linear_nearest,Fail dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_linear,Fail dEQP-VK.texture.mipmap.cubemap.image_view_min_lod.base_level.nearest_nearest,Fail - -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.offset_min_2_samples,Fail -dEQP-VK.draw.renderpass.linear_interpolation.offset_max_2_samples,Fail diff --git a/src/amd/ci/radv-polaris10-aco-flakes.txt b/src/amd/ci/radv-polaris10-aco-flakes.txt index 4eb9d6fe7bf..e7657a3ba5c 100644 --- a/src/amd/ci/radv-polaris10-aco-flakes.txt +++ b/src/amd/ci/radv-polaris10-aco-flakes.txt @@ -1,3 +1 @@ dEQP-VK.dynamic_rendering.basic.* -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.offset_min_2_samples -dEQP-VK.draw.renderpass.linear_interpolation.offset_max_2_samples diff --git a/src/amd/ci/radv-raven-flakes.txt b/src/amd/ci/radv-raven-flakes.txt index e8fc64274f7..c1cb913e211 100644 --- a/src/amd/ci/radv-raven-flakes.txt +++ b/src/amd/ci/radv-raven-flakes.txt @@ -1,6 +1,2 @@ -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.offset_max_2_samples -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.offset_max_8_samples -dEQP-VK.draw.dynamic_rendering.complete_secondary_cmd_buff.linear_interpolation.no_offset_4_samples -dEQP-VK.draw.dynamic_rendering.complete_secondary_cmd_buff.linear_interpolation.offset_max_4_samples # since Debian 12 CI uprev dEQP-VK.pipeline.pipeline_library.framebuffer_attachment.diff_attachments_2d_32x32_39x41_ms diff --git a/src/amd/ci/radv-renoir-aco-flakes.txt b/src/amd/ci/radv-renoir-aco-flakes.txt index 78077daaca7..4fd38f761d1 100644 --- a/src/amd/ci/radv-renoir-aco-flakes.txt +++ b/src/amd/ci/radv-renoir-aco-flakes.txt @@ -1,10 +1,8 @@ dEQP-VK.dynamic_rendering.basic.* dEQP-VK.reconvergence.workgroup_uniform_control_flow_elect.compute.nesting4.2.35 -dEQP-VK.draw.renderpass.linear_interpolation.offset_max_2_samples dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.2_cmdbuffers_resuming dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.2_secondary_cmdbuffers_resuming dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.2_secondary_2_primary_cmdbuffers_resuming -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.offset_min_2_samples # https://gitlab.freedesktop.org/mesa/mesa/-/issues/8817 dEQP-VK.*framebuffer_attachment.diff_attachments.* diff --git a/src/amd/ci/radv-vangogh-aco-fails.txt b/src/amd/ci/radv-vangogh-aco-fails.txt index 13fb44e0292..ab4c4c1b996 100644 --- a/src/amd/ci/radv-vangogh-aco-fails.txt +++ b/src/amd/ci/radv-vangogh-aco-fails.txt @@ -1,5 +1,3 @@ -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.no_offset_2_samples,Fail - # New CTS failures in 1.3.6.3 dEQP-VK.api.copy_and_blit.copy_commands2.image_to_buffer.2d_images.mip_copies_bc1_rgb_unorm_block_64x192_5_layers_compute,Crash dEQP-VK.api.copy_and_blit.copy_commands2.image_to_buffer.2d_images.mip_copies_bc1_rgb_unorm_block_64x192_5_layers_universal,Crash diff --git a/src/amd/ci/radv-vangogh-aco-flakes.txt b/src/amd/ci/radv-vangogh-aco-flakes.txt index 85fee0b108b..e81e23723e2 100644 --- a/src/amd/ci/radv-vangogh-aco-flakes.txt +++ b/src/amd/ci/radv-vangogh-aco-flakes.txt @@ -4,14 +4,4 @@ dEQP-VK.dynamic_rendering.basic.* dEQP-VK.dynamic_rendering.primary_cmd_buff.basic.* dEQP-VK.draw.dynamic_rendering.* -# This fail is 100% reproducible when running vkcts in parallel mode, -# but isn't reproducible when running with the normal full test list. -# Given that we want both CI and developers to be able to get clean -# runs, let's mark this test as a flake until it gets fixed. -# -# See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7220 -dEQP-VK.draw.renderpass.linear_interpolation.no_offset_2_samples -dEQP-VK.draw.renderpass.linear_interpolation.no_offset_4_samples -dEQP-VK.draw.renderpass.linear_interpolation.no_offset_8_samples - dEQP-VK.pipeline.monolithic.image.suballocation.sampling_type.separate.view_type.2d_array.format.r4g4b4a4_unorm_pack16.count_4.size.32x16_array_of_6 diff --git a/src/amd/ci/radv-vega10-aco-flakes.txt b/src/amd/ci/radv-vega10-aco-flakes.txt index 76a5dd3b6bf..f07af5bbab1 100644 --- a/src/amd/ci/radv-vega10-aco-flakes.txt +++ b/src/amd/ci/radv-vega10-aco-flakes.txt @@ -1,8 +1,6 @@ dEQP-VK.dynamic_rendering.basic.* dEQP-VK.dynamic_rendering.primary.* -dEQP-VK.draw.renderpass.linear_interpolation.offset_max_2_samples -dEQP-VK.draw.dynamic_rendering.primary_cmd_buff.linear_interpolation.offset_min_2_samples dEQP-VK.multiview.renderpass2.multisample.8_1_1_8 dEQP-VK.multiview.renderpass2.multisample.8 dEQP-VK.multiview.renderpass2.multisample.15_15_15_15 diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index b0a9a34c9ad..f89e2306932 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -270,7 +270,7 @@ radv_queue_submit_empty(struct radv_queue *queue, struct vk_queue_submit *submis } static void -radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sample_positions, uint32_t esgs_ring_size, +radv_fill_shader_rings(struct radv_device *device, uint32_t *map, uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo, uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, struct radeon_winsys_bo *tess_rings_bo, struct radeon_winsys_bo *task_rings_bo, struct radeon_winsys_bo *mesh_scratch_ring_bo, @@ -495,16 +495,14 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl desc += 4; - if (add_sample_positions) { - /* add sample positions after all rings */ - memcpy(desc, device->sample_locations_1x, 8); - desc += 2; - memcpy(desc, device->sample_locations_2x, 16); - desc += 4; - memcpy(desc, device->sample_locations_4x, 32); - desc += 8; - memcpy(desc, device->sample_locations_8x, 64); - } + /* add sample positions after all rings */ + memcpy(desc, device->sample_locations_1x, 8); + desc += 2; + memcpy(desc, device->sample_locations_2x, 16); + desc += 4; + memcpy(desc, device->sample_locations_4x, 32); + desc += 8; + memcpy(desc, device->sample_locations_8x, 64); } static void @@ -965,15 +963,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi tess_rings_bo != queue->tess_rings_bo || task_rings_bo != queue->task_rings_bo || mesh_scratch_ring_bo != queue->mesh_scratch_ring_bo || attr_ring_bo != queue->attr_ring_bo || add_sample_positions) { - uint32_t size = 0; - if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || attr_ring_bo || - add_sample_positions) { - size = 176; /* 2 dword + 2 padding + 4 dword * 10 */ - if (add_sample_positions) - size += 128; /* 64+32+16+8 = 120 bytes */ - } else if (scratch_bo) { - size = 8; /* 2 dword */ - } + const uint32_t size = 304; result = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY, @@ -1000,11 +990,8 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi map[1] = rsrc1; } - if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || attr_ring_bo || - add_sample_positions) - radv_fill_shader_rings(device, map, add_sample_positions, needs->esgs_ring_size, esgs_ring_bo, - needs->gsvs_ring_size, gsvs_ring_bo, tess_rings_bo, task_rings_bo, mesh_scratch_ring_bo, - needs->attr_ring_size, attr_ring_bo); + radv_fill_shader_rings(device, map, needs->esgs_ring_size, esgs_ring_bo, needs->gsvs_ring_size, gsvs_ring_bo, + tess_rings_bo, task_rings_bo, mesh_scratch_ring_bo, needs->attr_ring_size, attr_ring_bo); ws->buffer_unmap(descriptor_bo); }