diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c
index cd25c0846b8..70df3e6bb83 100644
--- a/src/gallium/drivers/asahi/agx_batch.c
+++ b/src/gallium/drivers/asahi/agx_batch.c
@@ -51,6 +51,7 @@ agx_batch_init(struct agx_context *ctx,
 
    util_dynarray_init(&batch->scissor, ctx);
    util_dynarray_init(&batch->depth_bias, ctx);
+   util_dynarray_init(&batch->occlusion_queries, ctx);
 
    batch->clear = 0;
    batch->draw = 0;
@@ -85,6 +86,10 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch)
    if (ctx->batch == batch)
       ctx->batch = NULL;
 
+   agx_finish_batch_occlusion_queries(batch);
+   batch->occlusion_buffer.cpu = NULL;
+   batch->occlusion_buffer.gpu = 0;
+
    /* There is no more writer for anything we wrote recorded on this context */
    hash_table_foreach(ctx->writer, ent) {
       if (ent->data == batch)
@@ -102,6 +107,7 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch)
 
    util_dynarray_fini(&batch->scissor);
    util_dynarray_fini(&batch->depth_bias);
+   util_dynarray_fini(&batch->occlusion_queries);
    util_unreference_framebuffer_state(&batch->key);
 
    unsigned batch_idx = agx_batch_idx(batch);
@@ -281,3 +287,25 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc)
    assert(!_mesa_hash_table_search(ctx->writer, rsrc));
    _mesa_hash_table_insert(ctx->writer, rsrc, batch);
 }
+
+/*
+ * The OpenGL specification says that
+ *
+ *    It must always be true that if any query object returns a result
+ *    available of TRUE, all queries of the same type issued prior to that
+ *    query must also return TRUE.
+ *
+ * To implement this, we need to be able to flush all batches writing occlusion
+ * queries so we ensure ordering.
+ */
+void
+agx_flush_occlusion_queries(struct agx_context *ctx)
+{
+   unsigned i;
+   foreach_batch(ctx, i) {
+      struct agx_batch *other = &ctx->batches.slots[i];
+
+      if (other->occlusion_queries.size != 0)
+         agx_flush_batch_for_reason(ctx, other, "Occlusion query ordering");
+   }
+}
diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c
index c98e08b77a4..68a839b068b 100644
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@@ -1012,6 +1012,18 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
     */
    agx_batch_add_bo(batch, batch->encoder);
 
+   /* Occlusion queries are allocated as a contiguous pool */
+   unsigned oq_count = util_dynarray_num_elements(&batch->occlusion_queries,
+                                                  struct agx_query *);
+   size_t oq_size = oq_count * sizeof(uint64_t);
+
+   if (oq_size) {
+      batch->occlusion_buffer = agx_pool_alloc_aligned(&batch->pool, oq_size, 64);
+      memset(batch->occlusion_buffer.cpu, 0, oq_size);
+   } else {
+      batch->occlusion_buffer.gpu = 0;
+   }
+
    unsigned handle_count =
       agx_batch_num_bo(batch) +
       agx_pool_num_bos(&batch->pool) +
@@ -1044,6 +1056,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
                encoder_id,
                scissor,
                zbias,
+               batch->occlusion_buffer.gpu,
                pipeline_background,
                pipeline_background_partial,
                pipeline_store,
diff --git a/src/gallium/drivers/asahi/agx_query.c b/src/gallium/drivers/asahi/agx_query.c
index b9137821ef2..831220103a6 100644
--- a/src/gallium/drivers/asahi/agx_query.c
+++ b/src/gallium/drivers/asahi/agx_query.c
@@ -10,6 +10,9 @@ agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
 {
    struct agx_query *query = calloc(1, sizeof(struct agx_query));
 
+   query->type = query_type;
+   query->index = index;
+
    return (struct pipe_query *)query;
 }
 
@@ -20,32 +23,157 @@ agx_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
 }
 
 static bool
-agx_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
 {
-   return true;
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_query *query = (struct agx_query *) pquery;
+
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      ctx->occlusion_query = query;
+      ctx->dirty |= AGX_DIRTY_QUERY;
+
+      /* begin_query zeroes, flush so we can do that write. If anything (i.e.
+       * other than piglit) actually hits this, we could shadow the query to
+       * avoid the flush.
+       */
+      if (query->writer)
+         agx_flush_batch_for_reason(ctx, query->writer, "Occlusion overwritten");
+
+      assert(query->writer == NULL);
+
+      query->value = 0;
+      return true;
+
+   default:
+      return false;
+   }
 }
 
 static bool
-agx_end_query(struct pipe_context *ctx, struct pipe_query *query)
+agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
 {
-   return true;
+   struct agx_context *ctx = agx_context(pctx);
+   struct agx_query *query = (struct agx_query *) pquery;
+
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      ctx->occlusion_query = NULL;
+      ctx->dirty |= AGX_DIRTY_QUERY;
+      return true;
+
+   default:
+      return false;
+   }
 }
 
 static bool
-agx_get_query_result(struct pipe_context *ctx,
-                     struct pipe_query *query,
+agx_get_query_result(struct pipe_context *pctx,
+                     struct pipe_query *pquery,
                      bool wait,
                      union pipe_query_result *vresult)
 {
-   uint64_t *result = (uint64_t*)vresult;
+   struct agx_query *query = (struct agx_query *) pquery;
+   struct agx_context *ctx = agx_context(pctx);
 
-   *result = 0;
-   return true;
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      if (query->writer != NULL) {
+         assert(query->writer->occlusion_queries.size != 0);
+
+         /* Querying the result forces a query to finish in finite time, so we
+          * need to flush regardless. Furthermore, we need all earlier queries
+          * to finish before this query, so we flush all batches writing queries
+          * now. Yes, this sucks for tilers.
+          */
+         agx_flush_occlusion_queries(ctx);
+
+         /* TODO: Respect wait when we have real sync */
+      }
+
+      assert(query->writer == NULL && "cleared when cleaning up batch");
+
+      if (query->type == PIPE_QUERY_OCCLUSION_COUNTER)
+         vresult->u64 = query->value;
+      else
+         vresult->b = query->value;
+
+      return true;
+
+   default:
+      unreachable("Other queries not yet supported");
+   }
 }
 
 static void
 agx_set_active_query_state(struct pipe_context *pipe, bool enable)
 {
+   struct agx_context *ctx = agx_context(pipe);
+
+   ctx->active_queries = enable;
+   ctx->dirty |= AGX_DIRTY_QUERY;
+}
+
+uint16_t
+agx_get_oq_index(struct agx_batch *batch, struct agx_query *query)
+{
+   /* If written by another batch, flush it now. If this affects real apps, we
+    * could avoid this flush by merging query results.
+    */
+   if (query->writer && query->writer != batch) {
+      agx_flush_batch_for_reason(batch->ctx, query->writer,
+                                 "Multiple occlusion query writers");
+   }
+
+   /* Allocate if needed */
+   if (query->writer == NULL) {
+      query->writer = batch;
+      query->writer_index = util_dynarray_num_elements(&batch->occlusion_queries,
+                                                       struct agx_query *);
+
+      util_dynarray_append(&batch->occlusion_queries, struct agx_query *, query);
+   }
+
+   assert(query->writer == batch);
+   assert(*util_dynarray_element(&batch->occlusion_queries, struct agx_query *,
+                                 query->writer_index) == query);
+
+   return query->writer_index;
+}
+
+void
+agx_finish_batch_occlusion_queries(struct agx_batch *batch)
+{
+   uint64_t *results = (uint64_t *) batch->occlusion_buffer.cpu;
+
+   util_dynarray_foreach(&batch->occlusion_queries, struct agx_query *, it) {
+      struct agx_query *query = *it;
+      assert(query->writer == batch);
+
+      /* Get the result for this batch. If results is NULL, it means that no
+       * draws actually enabled any occlusion queries, so there's no change.
+       */
+      if (results != NULL) {
+         uint64_t result = *(results++);
+
+         /* Accumulate with the previous result (e.g. in case we split a frame
+          * into multiple batches so an API-level query spans multiple batches).
+          */
+         if (query->type == PIPE_QUERY_OCCLUSION_COUNTER)
+            query->value += result;
+         else
+            query->value |= (!!result);
+      }
+
+      query->writer = NULL;
+      query->writer_index = 0;
+   }
 }
 
 void
@@ -57,4 +185,7 @@ agx_init_query_functions(struct pipe_context *pctx)
    pctx->end_query = agx_end_query;
    pctx->get_query_result = agx_get_query_result;
    pctx->set_active_query_state = agx_set_active_query_state;
+
+   /* By default queries are active */
+   agx_context(pctx)->active_queries = true;
 }
diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c
index adcfaaa7912..40e35540554 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -1698,7 +1698,6 @@ agx_batch_init_state(struct agx_batch *batch)
       .w_clamp = true,
       .varying_word_1 = true,
       .cull_2 = true,
-      .occlusion_query = true,
       .occlusion_query_2 = true,
       .output_unknown = true,
       .varying_word_2 = true,
@@ -1707,7 +1706,6 @@ agx_batch_init_state(struct agx_batch *batch)
    agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10;
    agx_ppp_push(&ppp, VARYING_1, cfg);
    agx_ppp_push(&ppp, CULL_2, cfg);
-   agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg);
    agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg);
    agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg);
    agx_ppp_push(&ppp, VARYING_2, cfg);
@@ -1833,7 +1831,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out,
                             (is_points && IS_DIRTY(SPRITE_COORD_MODE));
 
    bool fragment_control_dirty = IS_DIRTY(ZS) || IS_DIRTY(RS) ||
-                                 IS_DIRTY(PRIM);
+                                 IS_DIRTY(PRIM) || IS_DIRTY(QUERY);
 
    bool fragment_face_dirty = IS_DIRTY(ZS) || IS_DIRTY(STENCIL_REF) ||
                               IS_DIRTY(RS);
@@ -1857,11 +1855,19 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out,
       .varying_word_0 = IS_DIRTY(VS_PROG),
       .cull = IS_DIRTY(RS),
       .fragment_shader = IS_DIRTY(FS) || varyings_dirty,
+      .occlusion_query = IS_DIRTY(QUERY),
       .output_size = IS_DIRTY(VS_PROG),
    });
 
    if (fragment_control_dirty) {
       agx_ppp_push(&ppp, FRAGMENT_CONTROL, cfg) {
+         if (ctx->active_queries && ctx->occlusion_query) {
+            if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
+               cfg.visibility_mode = AGX_VISIBILITY_MODE_COUNTING;
+            else
+               cfg.visibility_mode = AGX_VISIBILITY_MODE_BOOLEAN;
+         }
+
          cfg.stencil_test_enable = ctx->zs->base.stencil[0].enabled;
          cfg.two_sided_stencil = ctx->zs->base.stencil[1].enabled;
          cfg.depth_bias_enable = rast->base.offset_tri;
@@ -1954,6 +1960,16 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out,
       }
    }
 
+   if (IS_DIRTY(QUERY)) {
+      agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg) {
+         if (ctx->active_queries && ctx->occlusion_query) {
+            cfg.index = agx_get_oq_index(batch, ctx->occlusion_query);
+         } else {
+            cfg.index = 0;
+         }
+      }
+   }
+
    if (IS_DIRTY(VS_PROG)) {
       agx_ppp_push(&ppp, OUTPUT_SIZE, cfg)
          cfg.count = vs->info.varyings.vs.nr_index;
diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h
index 3495f9c98b0..78668951b76 100644
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@@ -127,6 +127,12 @@ struct agx_batch {
 
    /* Scissor and depth-bias descriptors, uploaded at GPU time */
    struct util_dynarray scissor, depth_bias;
+
+   /* Indexed occlusion queries within the occlusion buffer, and the occlusion
+    * buffer itself which is allocated at submit time.
+    */
+   struct util_dynarray occlusion_queries;
+   struct agx_ptr occlusion_buffer;
 };
 
 struct agx_zsa {
@@ -186,6 +192,7 @@ enum agx_dirty {
    AGX_DIRTY_FS_PROG    = BITFIELD_BIT(11),
 
    AGX_DIRTY_BLEND      = BITFIELD_BIT(12),
+   AGX_DIRTY_QUERY      = BITFIELD_BIT(13),
 };
 
 #define AGX_MAX_BATCHES (2)
@@ -228,6 +235,9 @@ struct agx_context {
    bool cond_cond;
    enum pipe_render_cond_flag cond_mode;
 
+   struct agx_query *occlusion_query;
+   bool active_queries;
+
    struct util_debug_callback debug;
    bool is_noop;
 
@@ -264,7 +274,19 @@ struct agx_rasterizer {
 };
 
 struct agx_query {
-   unsigned	query;
+   unsigned type;
+   unsigned index;
+
+   /* Invariant for occlusion queries:
+    *
+    *    writer != NULL => writer->occlusion_queries[writer_index] == this, and
+    *    writer == NULL => no batch such that this in batch->occlusion_queries
+    */
+   struct agx_batch *writer;
+   unsigned writer_index;
+
+   /* For occlusion queries, which use some CPU work */
+   uint64_t value;
 };
 
 struct agx_sampler_state {
@@ -438,6 +460,8 @@ void agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch
 void agx_flush_all(struct agx_context *ctx, const char *reason);
 void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
 void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason);
+void agx_flush_batches_writing_occlusion_queries(struct agx_context *ctx);
+void agx_flush_occlusion_queries(struct agx_context *ctx);
 
 /* Use these instead of batch_add_bo for proper resource tracking */
 void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc);
@@ -464,4 +488,11 @@ agx_batch_init_state(struct agx_batch *batch);
 uint64_t
 agx_build_meta(struct agx_batch *batch, bool store, bool partial_render);
 
+/* Query management */
+uint16_t
+agx_get_oq_index(struct agx_batch *batch, struct agx_query *query);
+
+void
+agx_finish_batch_occlusion_queries(struct agx_batch *batch);
+
 #endif
diff --git a/src/gallium/drivers/asahi/magic.c b/src/gallium/drivers/asahi/magic.c
index 1f8d9ec1564..9064d52431b 100644
--- a/src/gallium/drivers/asahi/magic.c
+++ b/src/gallium/drivers/asahi/magic.c
@@ -156,6 +156,7 @@ demo_cmdbuf(uint64_t *buf, size_t size,
             uint64_t encoder_id,
             uint64_t scissor_ptr,
             uint64_t depth_bias_ptr,
+            uint64_t occlusion_ptr,
             uint32_t pipeline_clear,
             uint32_t pipeline_load,
             uint32_t pipeline_store,
@@ -194,6 +195,7 @@ demo_cmdbuf(uint64_t *buf, size_t size,
       cfg.store_pipeline = pipeline_store;
       cfg.scissor_array = scissor_ptr;
       cfg.depth_bias_array = depth_bias_ptr;
+      cfg.visibility_result_buffer = occlusion_ptr;
 
       if (framebuffer->zsbuf) {
          struct pipe_surface *zsbuf = framebuffer->zsbuf;
diff --git a/src/gallium/drivers/asahi/magic.h b/src/gallium/drivers/asahi/magic.h
index 0231afdc291..a1e98f2ddef 100644
--- a/src/gallium/drivers/asahi/magic.h
+++ b/src/gallium/drivers/asahi/magic.h
@@ -32,6 +32,7 @@ demo_cmdbuf(uint64_t *buf, size_t size,
             uint64_t encoder_id,
             uint64_t scissor_ptr,
             uint64_t depth_bias_ptr,
+            uint64_t occlusion_ptr,
             uint32_t pipeline_clear,
             uint32_t pipeline_load,
             uint32_t pipeline_store,