diff --git a/.pick_status.json b/.pick_status.json
index 1d6c48692e1..3a623d99f68 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -256,7 +256,7 @@
         "description": "intel/perf: fix performance counters availability after glFinish",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/gallium/drivers/iris/iris_monitor.c b/src/gallium/drivers/iris/iris_monitor.c
index b615476b2cf..919aebca478 100644
--- a/src/gallium/drivers/iris/iris_monitor.c
+++ b/src/gallium/drivers/iris/iris_monitor.c
@@ -283,7 +283,7 @@ iris_get_monitor_result(struct pipe_context *ctx,
    assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch));
 
    unsigned bytes_written;
-   gen_perf_get_query_data(perf_ctx, monitor->query,
+   gen_perf_get_query_data(perf_ctx, monitor->query, batch,
                            monitor->result_size,
                            (unsigned*) monitor->result_buffer,
                            &bytes_written);
diff --git a/src/gallium/drivers/iris/iris_performance_query.c b/src/gallium/drivers/iris/iris_performance_query.c
index 1e8e1d9201a..0ef5c206fe3 100644
--- a/src/gallium/drivers/iris/iris_performance_query.c
+++ b/src/gallium/drivers/iris/iris_performance_query.c
@@ -214,7 +214,8 @@ iris_get_perf_query_data(struct pipe_context *pipe,
    struct gen_perf_query_object *obj = perf_query->query;
    struct gen_perf_context *perf_ctx = ice->perf_ctx;
 
-   gen_perf_get_query_data(perf_ctx, obj, data_size, data, bytes_written);
+   gen_perf_get_query_data(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER],
+         data_size, data, bytes_written);
 }
 
 void
diff --git a/src/intel/perf/gen_perf_query.c b/src/intel/perf/gen_perf_query.c
index 57b01203a6f..b9744913b16 100644
--- a/src/intel/perf/gen_perf_query.c
+++ b/src/intel/perf/gen_perf_query.c
@@ -1061,17 +1061,6 @@ gen_perf_wait_query(struct gen_perf_context *perf_ctx,
       perf_cfg->vtbl.batchbuffer_flush(perf_ctx->ctx, __FILE__, __LINE__);
 
    perf_cfg->vtbl.bo_wait_rendering(bo);
-
-   /* Due to a race condition between the OA unit signaling report
-    * availability and the report actually being written into memory,
-    * we need to wait for all the reports to come in before we can
-    * read them.
-    */
-   if (query->queryinfo->kind == GEN_PERF_QUERY_TYPE_OA ||
-       query->queryinfo->kind == GEN_PERF_QUERY_TYPE_RAW) {
-      while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
-         ;
-   }
 }
 
 bool
@@ -1087,8 +1076,8 @@ gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
       return (query->oa.results_accumulated ||
               (query->oa.bo &&
                !perf_cfg->vtbl.batch_references(current_batch, query->oa.bo) &&
-               !perf_cfg->vtbl.bo_busy(query->oa.bo) &&
-               read_oa_samples_for_query(perf_ctx, query, current_batch)));
+               !perf_cfg->vtbl.bo_busy(query->oa.bo)));
+
    case GEN_PERF_QUERY_TYPE_PIPELINE:
       return (query->pipeline_stats.bo &&
               !perf_cfg->vtbl.batch_references(current_batch, query->pipeline_stats.bo) &&
@@ -1513,6 +1502,7 @@ get_pipeline_stats_data(struct gen_perf_context *perf_ctx,
 void
 gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
                         struct gen_perf_query_object *query,
+                        void *current_batch,
                         int data_size,
                         unsigned *data,
                         unsigned *bytes_written)
@@ -1524,6 +1514,17 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
    case GEN_PERF_QUERY_TYPE_OA:
    case GEN_PERF_QUERY_TYPE_RAW:
       if (!query->oa.results_accumulated) {
+         /* Due to the sampling frequency of the OA buffer by the i915-perf
+          * driver, there can be a 5ms delay between the Mesa seeing the query
+          * complete and i915 making all the OA buffer reports available to us.
+          * We need to wait for all the reports to come in before we can do
+          * the post processing removing unrelated deltas.
+          * There is a i915-perf series to address this issue, but it's
+          * not been merged upstream yet.
+          */
+         while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
+            ;
+
          read_gt_frequency(perf_ctx, query);
          uint32_t *begin_report = query->oa.map;
          uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
diff --git a/src/intel/perf/gen_perf_query.h b/src/intel/perf/gen_perf_query.h
index a0246501f76..d064a5d0669 100644
--- a/src/intel/perf/gen_perf_query.h
+++ b/src/intel/perf/gen_perf_query.h
@@ -76,6 +76,7 @@ void gen_perf_delete_query(struct gen_perf_context *perf_ctx,
                            struct gen_perf_query_object *query);
 void gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
                              struct gen_perf_query_object *query,
+                             void *current_batch,
                              int data_size,
                              unsigned *data,
                              unsigned *bytes_written);
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 6c74403da6c..042c236d976 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -323,7 +323,7 @@ brw_get_perf_query_data(struct gl_context *ctx,
     */
    assert(o->Ready);
 
-   gen_perf_get_query_data(brw->perf_ctx, obj,
+   gen_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
                            data_size, data, bytes_written);
 }