vc4: Emit semaphore instructions for new kernel ABI.

Previously, the kernel would dispatch thread 0, wait, then dispatch thread 1. By insisting that the thread contents use semaphores in the right place, the kernel can sleep for longer by dispatching both threads at once.
2025-12-31 14:10:09 +01:00 · 2014-11-18 12:16:55 -08:00 · 2014-11-18 12:16:55 -08:00 · 82e919d33b
commit 82e919d33b
parent 05f165b62d
3 changed files with 87 additions and 6 deletions
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@ -82,6 +82,8 @@ struct exec_info {
 	bool found_tile_binning_mode_config_packet;
 	bool found_tile_rendering_mode_config_packet;
 	bool found_start_tile_binning_packet;
+	bool found_increment_semaphore_packet;
+	bool found_wait_on_semaphore_packet;
 	uint8_t bin_tiles_x, bin_tiles_y;
 	uint32_t fb_width, fb_height;
 	uint32_t tile_alloc_init_block_size;
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@ -202,6 +202,18 @@ check_tex_size(struct exec_info *exec, struct drm_gem_cma_object *fbo,
 	return true;
 }

+static int
+validate_flush_all(VALIDATE_ARGS)
+{
+	if (exec->found_increment_semaphore_packet) {
+		DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
+			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int
 validate_start_tile_binning(VALIDATE_ARGS)
 {
@ -219,6 +231,41 @@ validate_start_tile_binning(VALIDATE_ARGS)
 	return 0;
 }

+static int
+validate_increment_semaphore(VALIDATE_ARGS)
+{
+	if (exec->found_increment_semaphore_packet) {
+		DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
+		return -EINVAL;
+	}
+	exec->found_increment_semaphore_packet = true;
+
+	/* Once we've found the semaphore increment, there should be one FLUSH
+	 * then the end of the command list.  The FLUSH actually triggers the
+	 * increment, so we only need to make sure there
+	 */
+
+	return 0;
+}
+
+static int
+validate_wait_on_semaphore(VALIDATE_ARGS)
+{
+	if (exec->found_wait_on_semaphore_packet) {
+		DRM_ERROR("Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n");
+		return -EINVAL;
+	}
+	exec->found_wait_on_semaphore_packet = true;
+
+	if (!exec->found_increment_semaphore_packet) {
+		DRM_ERROR("VC4_PACKET_WAIT_ON_SEMAPHORE without "
+			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int
 validate_branch_to_sublist(VALIDATE_ARGS)
 {
@ -233,6 +280,11 @@ validate_branch_to_sublist(VALIDATE_ARGS)
 		return -EINVAL;
 	}

+	if (!exec->found_wait_on_semaphore_packet) {
+		DRM_ERROR("Jumping to tile alloc before binning finished.\n");
+		return -EINVAL;
+	}
+
 	offset = *(uint32_t *)(untrusted + 0);
 	if (offset % exec->tile_alloc_init_block_size ||
 	    offset / exec->tile_alloc_init_block_size >
@ -322,6 +374,11 @@ validate_indexed_prim_list(VALIDATE_ARGS)
 	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
 	struct vc4_shader_state *shader_state;

+	if (exec->found_increment_semaphore_packet) {
+		DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
+		return -EINVAL;
+	}
+
 	/* Check overflow condition */
 	if (exec->shader_state_count == 0) {
 		DRM_ERROR("shader state must precede primitives\n");
@ -355,6 +412,11 @@ validate_gl_array_primitive(VALIDATE_ARGS)
 	uint32_t max_index;
 	struct vc4_shader_state *shader_state;

+	if (exec->found_increment_semaphore_packet) {
+		DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
+		return -EINVAL;
+	}
+
 	/* Check overflow condition */
 	if (exec->shader_state_count == 0) {
 		DRM_ERROR("shader state must precede primitives\n");
@ -600,10 +662,10 @@ static const struct cmd_info {
 	[VC4_PACKET_HALT] = { 1, 1, 1, "halt", NULL },
 	[VC4_PACKET_NOP] = { 1, 1, 1, "nop", NULL },
 	[VC4_PACKET_FLUSH] = { 1, 1, 1, "flush", NULL },
-	[VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", NULL },
+	[VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", validate_flush_all },
 	[VC4_PACKET_START_TILE_BINNING] = { 1, 0, 1, "start tile binning", validate_start_tile_binning },
-	[VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", NULL },
-	[VC4_PACKET_WAIT_ON_SEMAPHORE] = { 1, 1, 1, "wait on semaphore", NULL },
+	[VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", validate_increment_semaphore },
+	[VC4_PACKET_WAIT_ON_SEMAPHORE] = { 0, 1, 1, "wait on semaphore", validate_wait_on_semaphore },
 	/* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but
 	 * we only use it from the render CL in order to jump into the tile
 	 * allocation BO.
@ -737,6 +799,15 @@ vc4_validate_cl(struct drm_device *dev,
 			DRM_ERROR("Render CL missing VC4_PACKET_TILE_RENDERING_MODE_CONFIG\n");
 			return -EINVAL;
 		}
+
+		/* Make sure that they actually consumed the semaphore
+		 * increment from the bin CL.  Otherwise a later submit would
+		 * have render execute immediately.
+		 */
+		if (!exec->found_wait_on_semaphore_packet) {
+			DRM_ERROR("Render CL missing VC4_PACKET_WAIT_ON_SEMAPHORE\n");
+			return -EINVAL;
+		}
 		exec->ct1ea = exec->ct1ca + dst_offset;
 	}

--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@ -203,6 +203,12 @@ vc4_setup_rcl(struct vc4_context *vc4)
                         */
                        vc4_tile_coordinates(vc4, x, y, &coords_emitted);

+                        /* Wait for the binner before jumping to the first
+                         * tile's lists.
+                         */
+                        if (x == 0 && y == 0)
+                                cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
                        cl_start_reloc(&vc4->rcl, 1);
                        cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
                        cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
@ -269,12 +275,14 @@ vc4_flush(struct pipe_context *pctx)
        if (!vc4->needs_flush)
                return;

+        /* Increment the semaphore indicating that binning is done and
+         * unblocking the render thread.  Note that this doesn't act until the
+         * FLUSH completes.
+         */
+        cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
        /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
        cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);

-        cl_u8(&vc4->bcl, VC4_PACKET_NOP);
-        cl_u8(&vc4->bcl, VC4_PACKET_HALT);
-
        vc4_setup_rcl(vc4);

        if (vc4_debug & VC4_DEBUG_CL) {