diff --git a/src/asahi/lib/agx_nir_lower_gs.c b/src/asahi/lib/agx_nir_lower_gs.c index 67bf370a993..6a4dec6690f 100644 --- a/src/asahi/lib/agx_nir_lower_gs.c +++ b/src/asahi/lib/agx_nir_lower_gs.c @@ -78,14 +78,9 @@ store_geometry_param_offset(nir_builder *b, nir_def *def, uint32_t offset, static void add_counter(nir_builder *b, nir_def *counter, nir_def *increment) { - /* If the counter is NULL, the counter is disabled. Skip the update. */ - nir_if *nif = nir_push_if(b, nir_ine_imm(b, counter, 0)); - { - nir_def *old = nir_load_global(b, counter, 4, 1, 32); - nir_def *new_ = nir_iadd(b, old, increment); - nir_store_global(b, counter, 4, new_, nir_component_mask(1)); - } - nir_pop_if(b, nif); + nir_def *old = nir_load_global(b, counter, 4, 1, 32); + nir_def *new_ = nir_iadd(b, old, increment); + nir_store_global(b, counter, 4, new_, nir_component_mask(1)); } /* Helpers for lowering I/O to variables */ @@ -1055,9 +1050,9 @@ agx_nir_create_pre_gs(struct lower_gs_state *state, struct nir_xfb_info *xfb, uint32_t prim_stride_B = xfb->buffers[i].stride * vertices_per_prim; unsigned stream = xfb->buffer_to_stream[i]; - nir_def *off_ptr = load_geometry_param(b, xfb_offs_ptrs[i]); nir_def *size = nir_imul_imm(b, prims[stream], prim_stride_B); - add_counter(b, off_ptr, size); + libagx_update_xfb_counter(b, nir_load_geometry_param_buffer_agx(b), + nir_imm_int(b, i), size); } } diff --git a/src/asahi/libagx/geometry.cl b/src/asahi/libagx/geometry.cl index ac7eb0fe0ec..29b326abb79 100644 --- a/src/asahi/libagx/geometry.cl +++ b/src/asahi/libagx/geometry.cl @@ -544,13 +544,22 @@ libagx_unroll_restart(global struct agx_heap *heap, uint64_t index_buffer, uint libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i) { - global uint *off_ptr = p->xfb_offs_ptrs[i]; - if (!off_ptr) + if (p->xfb_offs_ptrs[i]) { + uint off = *(p->xfb_offs_ptrs[i]); + p->xfb_base[i] = p->xfb_base_original[i] + off; + return off; + } else { return 0; + } +} - uint off = *off_ptr; - p->xfb_base[i] = p->xfb_base_original[i] + off; - return off; +void +libagx_update_xfb_counter(global struct agx_geometry_params *p, uint i, + uint count) +{ + if (p->xfb_offs_ptrs[i]) { + *(p->xfb_offs_ptrs[i]) += count; + } } void diff --git a/src/asahi/vulkan/hk_cmd_buffer.h b/src/asahi/vulkan/hk_cmd_buffer.h index a1050819f8c..7de4187f57d 100644 --- a/src/asahi/vulkan/hk_cmd_buffer.h +++ b/src/asahi/vulkan/hk_cmd_buffer.h @@ -10,6 +10,7 @@ #include "util/macros.h" #include "util/list.h" +#include "agx_abi.h" #include "agx_helpers.h" #include "agx_linker.h" #include "agx_pack.h" @@ -752,10 +753,16 @@ hk_pipeline_stat_addr(struct hk_cmd_buffer *cmd, return root->draw.pipeline_stats + (sizeof(uint64_t) * index); } else { /* Query disabled */ - return 0; + return AGX_SCRATCH_PAGE_ADDRESS; } } +static inline bool +hk_stat_enabled(uint64_t addr) +{ + return addr != AGX_SCRATCH_PAGE_ADDRESS; +} + void hk_cmd_buffer_begin_graphics(struct hk_cmd_buffer *cmd, const VkCommandBufferBeginInfo *pBeginInfo); void hk_cmd_buffer_begin_compute(struct hk_cmd_buffer *cmd, diff --git a/src/asahi/vulkan/hk_cmd_dispatch.c b/src/asahi/vulkan/hk_cmd_dispatch.c index 9866b3040e7..99096d4aa7f 100644 --- a/src/asahi/vulkan/hk_cmd_dispatch.c +++ b/src/asahi/vulkan/hk_cmd_dispatch.c @@ -102,7 +102,7 @@ dispatch(struct hk_cmd_buffer *cmd, struct agx_grid grid) uint64_t stat = hk_pipeline_stat_addr( cmd, VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT); - if (stat) { + if (hk_stat_enabled(stat)) { perf_debug(cmd, "CS invocation statistic"); uint64_t grid = cmd->state.cs.descriptors.root.cs.group_count_addr; diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index 3ab18980a24..d81a50ec6b4 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -1145,6 +1145,11 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw) params.xfb_size[i] = gfx->xfb[i].range; params.xfb_offs_ptrs[i] = gfx->xfb_offsets + i * sizeof(uint32_t); } + } else { + for (unsigned i = 0; i < ARRAY_SIZE(gfx->xfb); ++i) { + params.xfb_base_original[i] = AGX_ZERO_PAGE_ADDRESS; + params.xfb_size[i] = 0; + } } for (unsigned i = 0; i < ARRAY_SIZE(gfx->xfb_query); ++i) { @@ -1153,9 +1158,18 @@ hk_upload_geometry_params(struct hk_cmd_buffer *cmd, struct agx_draw draw) if (q) { params.xfb_prims_generated_counter[i] = q; params.prims_generated_counter[i] = q + sizeof(uint64_t); + } else { + params.xfb_prims_generated_counter[i] = AGX_SCRATCH_PAGE_ADDRESS; + params.prims_generated_counter[i] = AGX_SCRATCH_PAGE_ADDRESS; } + + /* TODO: Optimize out? */ + params.xfb_overflow[i] = AGX_SCRATCH_PAGE_ADDRESS; } + /* TODO: Optimize out? */ + params.xfb_any_overflow = AGX_SCRATCH_PAGE_ADDRESS; + /* Calculate input primitive count for direct draws, and allocate the vertex * & count buffers. GPU calculates and allocates for indirect draws. */ @@ -1634,7 +1648,7 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs, grid_tess = agx_1d(patches * draw.b.count[1]); /* TCS invocation counter increments once per-patch */ - if (tcs_stat) { + if (hk_stat_enabled(tcs_stat)) { perf_debug(cmd, "Direct TCS statistic"); libagx_increment_statistic( cmd, agx_1d(1), AGX_BARRIER_ALL | AGX_PREGFX, tcs_stat, patches); @@ -2775,8 +2789,8 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, samples_shaded = dyn->ms.rasterization_samples; struct hk_fast_link_key_fs key = { - .prolog.statistics = hk_pipeline_stat_addr( - cmd, VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT), + .prolog.statistics = hk_stat_enabled(hk_pipeline_stat_addr( + cmd, VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT)), .prolog.cull_distance_size = hw_vs->info.cull_distance_array_size, .prolog.api_sample_mask = has_sample_mask ? api_sample_mask : 0xff, @@ -3492,8 +3506,10 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_) uint64_t stat_c_prims = hk_pipeline_stat_addr( cmd, VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT); - bool ia_stats = stat_ia_verts || stat_ia_prims || stat_vs_inv || - stat_c_inv || stat_c_prims; + bool ia_stats = hk_stat_enabled(stat_ia_verts) || + hk_stat_enabled(stat_ia_prims) || + hk_stat_enabled(stat_vs_inv) || + hk_stat_enabled(stat_c_inv) || hk_stat_enabled(stat_c_prims); struct hk_device *dev = hk_cmd_buffer_device(cmd); hk_foreach_view(cmd) { diff --git a/src/asahi/vulkan/hk_nir_lower_descriptors.c b/src/asahi/vulkan/hk_nir_lower_descriptors.c index 43e10b83be8..c3d92c44f3c 100644 --- a/src/asahi/vulkan/hk_nir_lower_descriptors.c +++ b/src/asahi/vulkan/hk_nir_lower_descriptors.c @@ -497,7 +497,8 @@ lower_uvs_index(nir_builder *b, nir_intrinsic_instr *intrin, void *data) present = nir_iand(b, present, nir_ine_imm(b, api_gs, 0)); } - addr = nir_bcsel(b, present, addr, nir_imm_int64(b, 0)); + addr = nir_bcsel(b, present, addr, + nir_imm_int64(b, AGX_SCRATCH_PAGE_ADDRESS)); nir_def_rewrite_uses(&intrin->def, addr); return true; diff --git a/src/gallium/drivers/asahi/agx_query.c b/src/gallium/drivers/asahi/agx_query.c index 1326f4747f5..1c2803cf147 100644 --- a/src/gallium/drivers/asahi/agx_query.c +++ b/src/gallium/drivers/asahi/agx_query.c @@ -11,6 +11,7 @@ #include "util/ralloc.h" #include "util/u_dump.h" #include "util/u_inlines.h" +#include "agx_abi.h" #include "agx_bo.h" #include "agx_device.h" #include "agx_state.h" @@ -567,7 +568,7 @@ agx_get_query_address(struct agx_batch *batch, struct agx_query *query) agx_add_query_to_batch(batch, query); return query->ptr.gpu; } else { - return 0; + return AGX_SCRATCH_PAGE_ADDRESS; } } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index f2053c23b5b..e9b2175b604 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -3870,14 +3870,14 @@ agx_ia_update(struct agx_batch *batch, const struct pipe_draw_info *info, /* With a geometry/tessellation shader, clipper counters are written by the * pre-GS/tess prefix sum kernel since they depend on the output on the - * geometry/tessellation shader. Without a geometry/tessellation shader, + * geometry/tessellation shader. Without a geometry/tessellation shader, * they are written along with IA. */ if (ctx->stage[PIPE_SHADER_GEOMETRY].shader || ctx->stage[PIPE_SHADER_TESS_EVAL].shader) { - c_prims = 0; - c_invs = 0; + c_prims = AGX_SCRATCH_PAGE_ADDRESS; + c_invs = AGX_SCRATCH_PAGE_ADDRESS; } if (info->primitive_restart) { @@ -3983,6 +3983,12 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer, params.xfb_any_overflow = agx_get_query_address(batch, batch->ctx->tf_any_overflow); + } else { + for (unsigned i = 0; i < ARRAY_SIZE(batch->ctx->tf_overflow); ++i) { + params.xfb_overflow[i] = AGX_SCRATCH_PAGE_ADDRESS; + } + + params.xfb_any_overflow = AGX_SCRATCH_PAGE_ADDRESS; } /* Calculate input primitive count for direct draws, and allocate the vertex @@ -4711,8 +4717,8 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info, * Otherwise, we do when tessellating. */ if (ctx->stage[PIPE_SHADER_GEOMETRY].shader) { - c_prims = 0; - c_invs = 0; + c_prims = AGX_SCRATCH_PAGE_ADDRESS; + c_invs = AGX_SCRATCH_PAGE_ADDRESS; } /* Generate counts, then prefix sum them, then finally tessellate. */