From 53d937c2e828aa08071fa0bca3a839dc064e5b64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= Date: Thu, 27 May 2021 10:59:07 +0200 Subject: [PATCH] v3d/simulator: use BFC/RFC registers to wait for bin/render to complete We were using the CT0CA (Control List Executor Current Address) and CT0EA (Control List Executor End Address) registers, but that would only wait for the CLE to reach the end of the list, but there could still be things in the rest of the pipeline. Even if that seems to work with the current simulator, the correct way to do that is using the BFC (Binning Mode Flush Count) and RFC (Rendering Mode Frame Count) registers instead. In fact, this would be needed with a newer simulator snapshot, in order to get the followint CTS tests working: dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image.4_bit dEQP-VK.api.copy_and_blit.core.resolve_image.whole_array_image_one_region.4_bit dEQP-VK.api.copy_and_blit.core.resolve_image.whole_copy_before_resolving.4_bit dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail dEQP-VK.api.image_clearing.core.clear_color_image.1d.optimal.multiple_layers.r32g32_uint dEQP-VK.api.image_clearing.core.clear_color_image.1d.optimal.remaining_array_layers_twostep.r16_sint Reviewed-by: Juan A. Suarez Part-of: --- src/broadcom/simulator/v3dx_simulator.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c index fe564596d6f..662624ee745 100644 --- a/src/broadcom/simulator/v3dx_simulator.c +++ b/src/broadcom/simulator/v3dx_simulator.c @@ -456,6 +456,12 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, uint32_t gmp_ofs) { + int last_bfc = (V3D_READ(V3D_CLE_0_BFC) & + V3D_CLE_0_BFC_BMFCT_SET); + + int last_rfc = (V3D_READ(V3D_CLE_0_RFC) & + V3D_CLE_0_RFC_RMFCT_SET); + g_gmp_ofs = gmp_ofs; v3d_reload_gmp(v3d); @@ -479,8 +485,8 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, * scheduler implements this using the GPU scheduler blocking on the * bin fence completing. (We don't use HW semaphores). */ - while (V3D_READ(V3D_CLE_0_CT0CA) != - V3D_READ(V3D_CLE_0_CT0EA)) { + while ((V3D_READ(V3D_CLE_0_BFC) & + V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) { v3d_hw_tick(v3d); } @@ -489,10 +495,8 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start); V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end); - while (V3D_READ(V3D_CLE_0_CT1CA) != - V3D_READ(V3D_CLE_0_CT1EA) || - V3D_READ(V3D_CLE_1_CT1CA) != - V3D_READ(V3D_CLE_1_CT1EA)) { + while ((V3D_READ(V3D_CLE_0_RFC) & + V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) { v3d_hw_tick(v3d); } }