diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c index cd25c0846b8..70df3e6bb83 100644 --- a/src/gallium/drivers/asahi/agx_batch.c +++ b/src/gallium/drivers/asahi/agx_batch.c @@ -51,6 +51,7 @@ agx_batch_init(struct agx_context *ctx, util_dynarray_init(&batch->scissor, ctx); util_dynarray_init(&batch->depth_bias, ctx); + util_dynarray_init(&batch->occlusion_queries, ctx); batch->clear = 0; batch->draw = 0; @@ -85,6 +86,10 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch) if (ctx->batch == batch) ctx->batch = NULL; + agx_finish_batch_occlusion_queries(batch); + batch->occlusion_buffer.cpu = NULL; + batch->occlusion_buffer.gpu = 0; + /* There is no more writer for anything we wrote recorded on this context */ hash_table_foreach(ctx->writer, ent) { if (ent->data == batch) @@ -102,6 +107,7 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch) util_dynarray_fini(&batch->scissor); util_dynarray_fini(&batch->depth_bias); + util_dynarray_fini(&batch->occlusion_queries); util_unreference_framebuffer_state(&batch->key); unsigned batch_idx = agx_batch_idx(batch); @@ -281,3 +287,25 @@ agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc) assert(!_mesa_hash_table_search(ctx->writer, rsrc)); _mesa_hash_table_insert(ctx->writer, rsrc, batch); } + +/* + * The OpenGL specification says that + * + * It must always be true that if any query object returns a result + * available of TRUE, all queries of the same type issued prior to that + * query must also return TRUE. + * + * To implement this, we need to be able to flush all batches writing occlusion + * queries so we ensure ordering. + */ +void +agx_flush_occlusion_queries(struct agx_context *ctx) +{ + unsigned i; + foreach_batch(ctx, i) { + struct agx_batch *other = &ctx->batches.slots[i]; + + if (other->occlusion_queries.size != 0) + agx_flush_batch_for_reason(ctx, other, "Occlusion query ordering"); + } +} diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index c98e08b77a4..68a839b068b 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -1012,6 +1012,18 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) */ agx_batch_add_bo(batch, batch->encoder); + /* Occlusion queries are allocated as a contiguous pool */ + unsigned oq_count = util_dynarray_num_elements(&batch->occlusion_queries, + struct agx_query *); + size_t oq_size = oq_count * sizeof(uint64_t); + + if (oq_size) { + batch->occlusion_buffer = agx_pool_alloc_aligned(&batch->pool, oq_size, 64); + memset(batch->occlusion_buffer.cpu, 0, oq_size); + } else { + batch->occlusion_buffer.gpu = 0; + } + unsigned handle_count = agx_batch_num_bo(batch) + agx_pool_num_bos(&batch->pool) + @@ -1044,6 +1056,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) encoder_id, scissor, zbias, + batch->occlusion_buffer.gpu, pipeline_background, pipeline_background_partial, pipeline_store, diff --git a/src/gallium/drivers/asahi/agx_query.c b/src/gallium/drivers/asahi/agx_query.c index b9137821ef2..831220103a6 100644 --- a/src/gallium/drivers/asahi/agx_query.c +++ b/src/gallium/drivers/asahi/agx_query.c @@ -10,6 +10,9 @@ agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index) { struct agx_query *query = calloc(1, sizeof(struct agx_query)); + query->type = query_type; + query->index = index; + return (struct pipe_query *)query; } @@ -20,32 +23,157 @@ agx_destroy_query(struct pipe_context *ctx, struct pipe_query *query) } static bool -agx_begin_query(struct pipe_context *ctx, struct pipe_query *query) +agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery) { - return true; + struct agx_context *ctx = agx_context(pctx); + struct agx_query *query = (struct agx_query *) pquery; + + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + ctx->occlusion_query = query; + ctx->dirty |= AGX_DIRTY_QUERY; + + /* begin_query zeroes, flush so we can do that write. If anything (i.e. + * other than piglit) actually hits this, we could shadow the query to + * avoid the flush. + */ + if (query->writer) + agx_flush_batch_for_reason(ctx, query->writer, "Occlusion overwritten"); + + assert(query->writer == NULL); + + query->value = 0; + return true; + + default: + return false; + } } static bool -agx_end_query(struct pipe_context *ctx, struct pipe_query *query) +agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery) { - return true; + struct agx_context *ctx = agx_context(pctx); + struct agx_query *query = (struct agx_query *) pquery; + + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + ctx->occlusion_query = NULL; + ctx->dirty |= AGX_DIRTY_QUERY; + return true; + + default: + return false; + } } static bool -agx_get_query_result(struct pipe_context *ctx, - struct pipe_query *query, +agx_get_query_result(struct pipe_context *pctx, + struct pipe_query *pquery, bool wait, union pipe_query_result *vresult) { - uint64_t *result = (uint64_t*)vresult; + struct agx_query *query = (struct agx_query *) pquery; + struct agx_context *ctx = agx_context(pctx); - *result = 0; - return true; + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: + if (query->writer != NULL) { + assert(query->writer->occlusion_queries.size != 0); + + /* Querying the result forces a query to finish in finite time, so we + * need to flush regardless. Furthermore, we need all earlier queries + * to finish before this query, so we flush all batches writing queries + * now. Yes, this sucks for tilers. + */ + agx_flush_occlusion_queries(ctx); + + /* TODO: Respect wait when we have real sync */ + } + + assert(query->writer == NULL && "cleared when cleaning up batch"); + + if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) + vresult->u64 = query->value; + else + vresult->b = query->value; + + return true; + + default: + unreachable("Other queries not yet supported"); + } } static void agx_set_active_query_state(struct pipe_context *pipe, bool enable) { + struct agx_context *ctx = agx_context(pipe); + + ctx->active_queries = enable; + ctx->dirty |= AGX_DIRTY_QUERY; +} + +uint16_t +agx_get_oq_index(struct agx_batch *batch, struct agx_query *query) +{ + /* If written by another batch, flush it now. If this affects real apps, we + * could avoid this flush by merging query results. + */ + if (query->writer && query->writer != batch) { + agx_flush_batch_for_reason(batch->ctx, query->writer, + "Multiple occlusion query writers"); + } + + /* Allocate if needed */ + if (query->writer == NULL) { + query->writer = batch; + query->writer_index = util_dynarray_num_elements(&batch->occlusion_queries, + struct agx_query *); + + util_dynarray_append(&batch->occlusion_queries, struct agx_query *, query); + } + + assert(query->writer == batch); + assert(*util_dynarray_element(&batch->occlusion_queries, struct agx_query *, + query->writer_index) == query); + + return query->writer_index; +} + +void +agx_finish_batch_occlusion_queries(struct agx_batch *batch) +{ + uint64_t *results = (uint64_t *) batch->occlusion_buffer.cpu; + + util_dynarray_foreach(&batch->occlusion_queries, struct agx_query *, it) { + struct agx_query *query = *it; + assert(query->writer == batch); + + /* Get the result for this batch. If results is NULL, it means that no + * draws actually enabled any occlusion queries, so there's no change. + */ + if (results != NULL) { + uint64_t result = *(results++); + + /* Accumulate with the previous result (e.g. in case we split a frame + * into multiple batches so an API-level query spans multiple batches). + */ + if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) + query->value += result; + else + query->value |= (!!result); + } + + query->writer = NULL; + query->writer_index = 0; + } } void @@ -57,4 +185,7 @@ agx_init_query_functions(struct pipe_context *pctx) pctx->end_query = agx_end_query; pctx->get_query_result = agx_get_query_result; pctx->set_active_query_state = agx_set_active_query_state; + + /* By default queries are active */ + agx_context(pctx)->active_queries = true; } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index adcfaaa7912..40e35540554 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1698,7 +1698,6 @@ agx_batch_init_state(struct agx_batch *batch) .w_clamp = true, .varying_word_1 = true, .cull_2 = true, - .occlusion_query = true, .occlusion_query_2 = true, .output_unknown = true, .varying_word_2 = true, @@ -1707,7 +1706,6 @@ agx_batch_init_state(struct agx_batch *batch) agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10; agx_ppp_push(&ppp, VARYING_1, cfg); agx_ppp_push(&ppp, CULL_2, cfg); - agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg); agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg); agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg); agx_ppp_push(&ppp, VARYING_2, cfg); @@ -1833,7 +1831,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, (is_points && IS_DIRTY(SPRITE_COORD_MODE)); bool fragment_control_dirty = IS_DIRTY(ZS) || IS_DIRTY(RS) || - IS_DIRTY(PRIM); + IS_DIRTY(PRIM) || IS_DIRTY(QUERY); bool fragment_face_dirty = IS_DIRTY(ZS) || IS_DIRTY(STENCIL_REF) || IS_DIRTY(RS); @@ -1857,11 +1855,19 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, .varying_word_0 = IS_DIRTY(VS_PROG), .cull = IS_DIRTY(RS), .fragment_shader = IS_DIRTY(FS) || varyings_dirty, + .occlusion_query = IS_DIRTY(QUERY), .output_size = IS_DIRTY(VS_PROG), }); if (fragment_control_dirty) { agx_ppp_push(&ppp, FRAGMENT_CONTROL, cfg) { + if (ctx->active_queries && ctx->occlusion_query) { + if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER) + cfg.visibility_mode = AGX_VISIBILITY_MODE_COUNTING; + else + cfg.visibility_mode = AGX_VISIBILITY_MODE_BOOLEAN; + } + cfg.stencil_test_enable = ctx->zs->base.stencil[0].enabled; cfg.two_sided_stencil = ctx->zs->base.stencil[1].enabled; cfg.depth_bias_enable = rast->base.offset_tri; @@ -1954,6 +1960,16 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, } } + if (IS_DIRTY(QUERY)) { + agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg) { + if (ctx->active_queries && ctx->occlusion_query) { + cfg.index = agx_get_oq_index(batch, ctx->occlusion_query); + } else { + cfg.index = 0; + } + } + } + if (IS_DIRTY(VS_PROG)) { agx_ppp_push(&ppp, OUTPUT_SIZE, cfg) cfg.count = vs->info.varyings.vs.nr_index; diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 3495f9c98b0..78668951b76 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -127,6 +127,12 @@ struct agx_batch { /* Scissor and depth-bias descriptors, uploaded at GPU time */ struct util_dynarray scissor, depth_bias; + + /* Indexed occlusion queries within the occlusion buffer, and the occlusion + * buffer itself which is allocated at submit time. + */ + struct util_dynarray occlusion_queries; + struct agx_ptr occlusion_buffer; }; struct agx_zsa { @@ -186,6 +192,7 @@ enum agx_dirty { AGX_DIRTY_FS_PROG = BITFIELD_BIT(11), AGX_DIRTY_BLEND = BITFIELD_BIT(12), + AGX_DIRTY_QUERY = BITFIELD_BIT(13), }; #define AGX_MAX_BATCHES (2) @@ -228,6 +235,9 @@ struct agx_context { bool cond_cond; enum pipe_render_cond_flag cond_mode; + struct agx_query *occlusion_query; + bool active_queries; + struct util_debug_callback debug; bool is_noop; @@ -264,7 +274,19 @@ struct agx_rasterizer { }; struct agx_query { - unsigned query; + unsigned type; + unsigned index; + + /* Invariant for occlusion queries: + * + * writer != NULL => writer->occlusion_queries[writer_index] == this, and + * writer == NULL => no batch such that this in batch->occlusion_queries + */ + struct agx_batch *writer; + unsigned writer_index; + + /* For occlusion queries, which use some CPU work */ + uint64_t value; }; struct agx_sampler_state { @@ -438,6 +460,8 @@ void agx_flush_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch void agx_flush_all(struct agx_context *ctx, const char *reason); void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason); void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc, const char *reason); +void agx_flush_batches_writing_occlusion_queries(struct agx_context *ctx); +void agx_flush_occlusion_queries(struct agx_context *ctx); /* Use these instead of batch_add_bo for proper resource tracking */ void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc); @@ -464,4 +488,11 @@ agx_batch_init_state(struct agx_batch *batch); uint64_t agx_build_meta(struct agx_batch *batch, bool store, bool partial_render); +/* Query management */ +uint16_t +agx_get_oq_index(struct agx_batch *batch, struct agx_query *query); + +void +agx_finish_batch_occlusion_queries(struct agx_batch *batch); + #endif diff --git a/src/gallium/drivers/asahi/magic.c b/src/gallium/drivers/asahi/magic.c index 1f8d9ec1564..9064d52431b 100644 --- a/src/gallium/drivers/asahi/magic.c +++ b/src/gallium/drivers/asahi/magic.c @@ -156,6 +156,7 @@ demo_cmdbuf(uint64_t *buf, size_t size, uint64_t encoder_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr, + uint64_t occlusion_ptr, uint32_t pipeline_clear, uint32_t pipeline_load, uint32_t pipeline_store, @@ -194,6 +195,7 @@ demo_cmdbuf(uint64_t *buf, size_t size, cfg.store_pipeline = pipeline_store; cfg.scissor_array = scissor_ptr; cfg.depth_bias_array = depth_bias_ptr; + cfg.visibility_result_buffer = occlusion_ptr; if (framebuffer->zsbuf) { struct pipe_surface *zsbuf = framebuffer->zsbuf; diff --git a/src/gallium/drivers/asahi/magic.h b/src/gallium/drivers/asahi/magic.h index 0231afdc291..a1e98f2ddef 100644 --- a/src/gallium/drivers/asahi/magic.h +++ b/src/gallium/drivers/asahi/magic.h @@ -32,6 +32,7 @@ demo_cmdbuf(uint64_t *buf, size_t size, uint64_t encoder_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr, + uint64_t occlusion_ptr, uint32_t pipeline_clear, uint32_t pipeline_load, uint32_t pipeline_store,