diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c
index aae730b0cbd..939e828d475 100644
--- a/src/gallium/drivers/iris/iris_batch.c
+++ b/src/gallium/drivers/iris/iris_batch.c
@@ -266,8 +266,15 @@ ensure_exec_obj_space(struct iris_batch *batch, uint32_t count)
 static void
 add_bo_to_batch(struct iris_batch *batch, struct iris_bo *bo, bool writable)
 {
+   uint64_t extra_flags = 0;
+
    assert(batch->exec_array_size > batch->exec_count);
 
+   if (writable)
+      extra_flags |= EXEC_OBJECT_WRITE;
+   if (!iris_bo_is_external(bo))
+      extra_flags |= EXEC_OBJECT_ASYNC;
+
    iris_bo_reference(bo);
 
    batch->exec_bos[batch->exec_count] = bo;
@@ -276,7 +283,7 @@ add_bo_to_batch(struct iris_batch *batch, struct iris_bo *bo, bool writable)
       (struct drm_i915_gem_exec_object2) {
          .handle = bo->gem_handle,
          .offset = bo->address,
-         .flags = bo->kflags | (writable ? EXEC_OBJECT_WRITE : 0),
+         .flags = bo->kflags | extra_flags,
       };
 
    bo->index = batch->exec_count;
@@ -346,12 +353,8 @@ iris_use_pinned_bo(struct iris_batch *batch,
           * we want to avoid synchronizing in this case.
           */
          if (other_entry &&
-             ((other_entry->flags & EXEC_OBJECT_WRITE) || writable)) {
+             ((other_entry->flags & EXEC_OBJECT_WRITE) || writable))
             iris_batch_flush(batch->other_batches[b]);
-            iris_batch_add_syncobj(batch,
-                                   batch->other_batches[b]->last_fence->syncobj,
-                                   I915_EXEC_FENCE_WAIT);
-         }
       }
    }
 
@@ -627,6 +630,123 @@ iris_batch_check_for_reset(struct iris_batch *batch)
    return status;
 }
 
+static void
+move_syncobj_to_batch(struct iris_batch *batch,
+                      struct iris_syncobj **p_syncobj,
+                      unsigned flags)
+{
+   struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
+
+   if (!*p_syncobj)
+      return;
+
+   bool found = false;
+   util_dynarray_foreach(&batch->syncobjs, struct iris_syncobj *, s) {
+      if (*p_syncobj == *s) {
+         found = true;
+         break;
+      }
+   }
+
+   if (!found)
+      iris_batch_add_syncobj(batch, *p_syncobj, flags);
+
+   iris_syncobj_reference(bufmgr, p_syncobj, NULL);
+}
+
+static void
+update_bo_syncobjs(struct iris_batch *batch, struct iris_bo *bo, bool write)
+{
+   struct iris_screen *screen = batch->screen;
+   struct iris_bufmgr *bufmgr = screen->bufmgr;
+
+   /* Make sure bo->deps is big enough */
+   if (screen->id >= bo->deps_size) {
+      int new_size = screen->id + 1;
+      bo->deps= realloc(bo->deps, new_size * sizeof(bo->deps[0]));
+      memset(&bo->deps[bo->deps_size], 0,
+             sizeof(bo->deps[0]) * (new_size - bo->deps_size));
+
+      bo->deps_size = new_size;
+   }
+
+   /* When it comes to execbuf submission of non-shared buffers, we only need
+    * to care about the reads and writes done by the other batches of our own
+    * screen, and we also don't care about the reads and writes done by our
+    * own batch, although we need to track them. Just note that other places of
+    * our code may need to care about all the operations done by every batch
+    * on every screen.
+    */
+   struct iris_bo_screen_deps *deps = &bo->deps[screen->id];
+   int batch_idx = batch->name;
+
+#if IRIS_BATCH_COUNT == 2
+   /* Due to the above, we exploit the fact that IRIS_NUM_BATCHES is actually
+    * 2, which means there's only one other batch we need to care about.
+    */
+   int other_batch_idx = 1 - batch_idx;
+#else
+   /* For IRIS_BATCH_COUNT == 3 we can do:
+    *   int other_batch_idxs[IRIS_BATCH_COUNT - 1] = {
+    *      (batch_idx ^ 1) & 1,
+    *      (batch_idx ^ 2) & 2,
+    *   };
+    * For IRIS_BATCH_COUNT == 4 we can do:
+    *   int other_batch_idxs[IRIS_BATCH_COUNT - 1] = {
+    *      (batch_idx + 1) & 3,
+    *      (batch_idx + 2) & 3,
+    *      (batch_idx + 3) & 3,
+    *   };
+    */
+#error "Implement me."
+#endif
+
+   /* If it is being written to by others, wait on it. */
+   if (deps->write_syncobjs[other_batch_idx])
+      move_syncobj_to_batch(batch, &deps->write_syncobjs[other_batch_idx],
+                            I915_EXEC_FENCE_WAIT);
+
+   struct iris_syncobj *batch_syncobj = iris_batch_get_signal_syncobj(batch);
+
+   if (write) {
+      /* If we're writing to it, set our batch's syncobj as write_syncobj so
+       * others can wait on us. Also wait every reader we care about before
+       * writing.
+       */
+      iris_syncobj_reference(bufmgr, &deps->write_syncobjs[batch_idx],
+                              batch_syncobj);
+
+      move_syncobj_to_batch(batch, &deps->read_syncobjs[other_batch_idx],
+                           I915_EXEC_FENCE_WAIT);
+
+   } else {
+      /* If we're reading, replace the other read from our batch index. */
+      iris_syncobj_reference(bufmgr, &deps->read_syncobjs[batch_idx],
+                             batch_syncobj);
+   }
+}
+
+static void
+update_batch_syncobjs(struct iris_batch *batch)
+{
+   struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
+   simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
+
+   simple_mtx_lock(bo_deps_lock);
+
+   for (int i = 0; i < batch->exec_count; i++) {
+      struct iris_bo *bo = batch->exec_bos[i];
+      struct drm_i915_gem_exec_object2 *exec_obj = &batch->validation_list[i];
+      bool write = exec_obj->flags & EXEC_OBJECT_WRITE;
+
+      if (bo == batch->screen->workaround_bo)
+         continue;
+
+      update_bo_syncobjs(batch, bo, write);
+   }
+   simple_mtx_unlock(bo_deps_lock);
+}
+
 /**
  * Submit the batch to the GPU via execbuffer2.
  */
@@ -711,6 +831,8 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
 
    iris_finish_batch(batch);
 
+   update_batch_syncobjs(batch);
+
    if (INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL)) {
       const char *basefile = strstr(file, "iris/");
       if (basefile)
diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h
index cd7de5221c9..68617d0a6be 100644
--- a/src/gallium/drivers/iris/iris_batch.h
+++ b/src/gallium/drivers/iris/iris_batch.h
@@ -56,8 +56,6 @@ enum iris_batch_name {
    IRIS_BATCH_COMPUTE,
 };
 
-#define IRIS_BATCH_COUNT 2
-
 struct iris_batch {
    struct iris_context *ice;
    struct iris_screen *screen;
diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c
index 52a71259283..08f88032bf0 100644
--- a/src/gallium/drivers/iris/iris_bufmgr.c
+++ b/src/gallium/drivers/iris/iris_bufmgr.c
@@ -181,6 +181,7 @@ struct iris_bufmgr {
    int fd;
 
    simple_mtx_t lock;
+   simple_mtx_t bo_deps_lock;
 
    /** Array of lists of cached gem objects of power-of-two sizes */
    struct bo_cache_bucket cache_bucket[14 * 4];
@@ -381,20 +382,100 @@ vma_free(struct iris_bufmgr *bufmgr,
    util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
 }
 
-int
-iris_bo_busy(struct iris_bo *bo)
+static bool
+iris_bo_busy_gem(struct iris_bo *bo)
 {
    struct iris_bufmgr *bufmgr = bo->bufmgr;
    struct drm_i915_gem_busy busy = { .handle = bo->gem_handle };
 
    int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
    if (ret == 0) {
-      bo->idle = !busy.busy;
       return busy.busy;
    }
    return false;
 }
 
+/* A timeout of 0 just checks for busyness. */
+static int
+iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
+{
+   int ret = 0;
+   struct iris_bufmgr *bufmgr = bo->bufmgr;
+
+   /* If we know it's idle, don't bother with the kernel round trip */
+   if (bo->idle)
+      return 0;
+
+   simple_mtx_lock(&bufmgr->bo_deps_lock);
+
+   uint32_t handles[bo->deps_size * IRIS_BATCH_COUNT * 2];
+   int handle_count = 0;
+
+   for (int d = 0; d < bo->deps_size; d++) {
+      for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
+         struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
+         struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
+         if (r)
+            handles[handle_count++] = r->handle;
+         if (w)
+            handles[handle_count++] = w->handle;
+      }
+   }
+
+   if (handle_count == 0)
+      goto out;
+
+   /* Unlike the gem wait, negative values are not infinite here. */
+   int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
+   if (timeout_abs < 0)
+      timeout_abs = INT64_MAX;
+
+   struct drm_syncobj_wait args = {
+      .handles = (uintptr_t) handles,
+      .timeout_nsec = timeout_abs,
+      .count_handles = handle_count,
+      .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
+   };
+
+   ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
+   if (ret != 0) {
+      ret = -errno;
+      goto out;
+   }
+
+   /* We just waited everything, so clean all the deps. */
+   for (int d = 0; d < bo->deps_size; d++) {
+      for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
+         iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
+         iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
+      }
+   }
+
+out:
+   simple_mtx_unlock(&bufmgr->bo_deps_lock);
+   return ret;
+}
+
+static bool
+iris_bo_busy_syncobj(struct iris_bo *bo)
+{
+   return iris_bo_wait_syncobj(bo, 0) == -ETIME;
+}
+
+bool
+iris_bo_busy(struct iris_bo *bo)
+{
+   bool busy;
+   if (iris_bo_is_external(bo))
+      busy = iris_bo_busy_gem(bo);
+   else
+      busy = iris_bo_busy_syncobj(bo);
+
+   bo->idle = !busy;
+
+   return busy;
+}
+
 int
 iris_bo_madvise(struct iris_bo *bo, int state)
 {
@@ -865,6 +946,14 @@ bo_close(struct iris_bo *bo)
    /* Return the VMA for reuse */
    vma_free(bo->bufmgr, bo->address, bo->size);
 
+   for (int d = 0; d < bo->deps_size; d++) {
+      for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
+         iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
+         iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
+      }
+   }
+   free(bo->deps);
+
    free(bo);
 }
 
@@ -1149,6 +1238,22 @@ iris_bo_wait_rendering(struct iris_bo *bo)
    iris_bo_wait(bo, -1);
 }
 
+static int
+iris_bo_wait_gem(struct iris_bo *bo, int64_t timeout_ns)
+{
+   struct iris_bufmgr *bufmgr = bo->bufmgr;
+   struct drm_i915_gem_wait wait = {
+      .bo_handle = bo->gem_handle,
+      .timeout_ns = timeout_ns,
+   };
+
+   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
+   if (ret != 0)
+      return -errno;
+
+   return 0;
+}
+
 /**
  * Waits on a BO for the given amount of time.
  *
@@ -1179,17 +1284,13 @@ iris_bo_wait_rendering(struct iris_bo *bo)
 int
 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
 {
-   struct iris_bufmgr *bufmgr = bo->bufmgr;
+   int ret;
 
-   /* If we know it's idle, don't bother with the kernel round trip */
-   if (bo->idle && !iris_bo_is_external(bo))
-      return 0;
+   if (iris_bo_is_external(bo))
+      ret = iris_bo_wait_gem(bo, timeout_ns);
+   else
+      ret = iris_bo_wait_syncobj(bo, timeout_ns);
 
-   struct drm_i915_gem_wait wait = {
-      .bo_handle = bo->gem_handle,
-      .timeout_ns = timeout_ns,
-   };
-   int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
    if (ret != 0)
       return -errno;
 
@@ -1208,6 +1309,7 @@ iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
    bufmgr->aux_map_ctx = NULL;
 
    simple_mtx_destroy(&bufmgr->lock);
+   simple_mtx_destroy(&bufmgr->bo_deps_lock);
 
    /* Free any cached buffer objects we were going to reuse */
    for (int i = 0; i < bufmgr->num_buckets; i++) {
@@ -1786,6 +1888,7 @@ iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
    p_atomic_set(&bufmgr->refcount, 1);
 
    simple_mtx_init(&bufmgr->lock, mtx_plain);
+   simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
 
    list_inithead(&bufmgr->zombie_list);
 
@@ -1924,3 +2027,9 @@ iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
 {
    return bufmgr->aux_map_ctx;
 }
+
+simple_mtx_t *
+iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
+{
+   return &bufmgr->bo_deps_lock;
+}
diff --git a/src/gallium/drivers/iris/iris_bufmgr.h b/src/gallium/drivers/iris/iris_bufmgr.h
index 226168dd28c..3c2da0e06a6 100644
--- a/src/gallium/drivers/iris/iris_bufmgr.h
+++ b/src/gallium/drivers/iris/iris_bufmgr.h
@@ -31,13 +31,15 @@
 #include "c11/threads.h"
 #include "util/macros.h"
 #include "util/u_atomic.h"
+#include "util/u_dynarray.h"
 #include "util/list.h"
+#include "util/simple_mtx.h"
 #include "pipe/p_defines.h"
 
-struct iris_batch;
 struct intel_device_info;
 struct pipe_debug_callback;
 struct isl_surf;
+struct iris_syncobj;
 
 /**
  * Memory zones.  When allocating a buffer, you can request that it is
@@ -129,6 +131,13 @@ enum iris_mmap_mode {
    IRIS_MMAP_WB, /**< Write-back mapping with CPU caches enabled */
 };
 
+#define IRIS_BATCH_COUNT 2
+
+struct iris_bo_screen_deps {
+   struct iris_syncobj *write_syncobjs[IRIS_BATCH_COUNT];
+   struct iris_syncobj *read_syncobjs[IRIS_BATCH_COUNT];
+};
+
 struct iris_bo {
    /**
     * Size in bytes of the buffer object.
@@ -213,6 +222,10 @@ struct iris_bo {
     */
    uint64_t last_seqnos[NUM_IRIS_DOMAINS] __attribute__ ((aligned (8)));
 
+   /** Up to one per screen, may need realloc. */
+   struct iris_bo_screen_deps *deps;
+   int deps_size;
+
    /**
     * Boolean of whether the GPU is definitely not accessing the buffer.
     *
@@ -346,10 +359,10 @@ iris_bo_is_external(const struct iris_bo *bo)
 void iris_bo_mark_exported(struct iris_bo *bo);
 
 /**
- * Returns 1 if mapping the buffer for write could cause the process
+ * Returns true  if mapping the buffer for write could cause the process
  * to block, due to the object being active in the GPU.
  */
-int iris_bo_busy(struct iris_bo *bo);
+bool iris_bo_busy(struct iris_bo *bo);
 
 /**
  * Specify the volatility of the buffer.
@@ -451,4 +464,6 @@ enum iris_memory_zone iris_memzone_for_address(uint64_t address);
 
 int iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr);
 
+simple_mtx_t *iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr);
+
 #endif /* IRIS_BUFMGR_H */