diff --git a/.pick_status.json b/.pick_status.json index f0c86acbb41..fbfc0647364 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -144,7 +144,7 @@ "description": "panfrost: Take tiler memory budget into account in pan_select_tiler_hierarchy_mask", "nominated": true, "nomination_type": 4, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/gallium/drivers/panfrost/pan_csf.c b/src/gallium/drivers/panfrost/pan_csf.c index 6c7345a2663..f2633b157d5 100644 --- a/src/gallium/drivers/panfrost/pan_csf.c +++ b/src/gallium/drivers/panfrost/pan_csf.c @@ -687,23 +687,16 @@ csf_emit_tiler_desc(struct panfrost_batch *batch, const struct pan_fb_info *fb) { struct panfrost_context *ctx = batch->ctx; struct panfrost_device *dev = pan_device(ctx->base.screen); + struct panfrost_screen *screen = pan_screen(ctx->base.screen); if (!batch->csf.pending_tiler_desc) return; + /* The tiler chunk start with a header of 64 bytes */ pan_pack(batch->csf.pending_tiler_desc, TILER_CONTEXT, tiler) { - tiler.hierarchy_mask = - pan_select_tiler_hierarchy_mask(batch->key.width, - batch->key.height, - dev->tiler_features.max_levels); - - /* Disable hierarchies falling under the effective tile size. */ - uint32_t disable_hierarchies; - for (disable_hierarchies = 0; - fb->tile_size > (16 * 16) << (disable_hierarchies * 2); - disable_hierarchies++) - ; - tiler.hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies); + tiler.hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)( + batch->key.width, batch->key.height, dev->tiler_features.max_levels, + fb->tile_size, screen->csf_tiler_heap.chunk_size - 64); #if PAN_ARCH >= 12 tiler.effective_tile_size = fb->tile_size; diff --git a/src/gallium/drivers/panfrost/pan_jm.c b/src/gallium/drivers/panfrost/pan_jm.c index 0babf81e479..1b49e08e997 100644 --- a/src/gallium/drivers/panfrost/pan_jm.c +++ b/src/gallium/drivers/panfrost/pan_jm.c @@ -426,10 +426,10 @@ jm_emit_tiler_desc(struct panfrost_batch *batch) t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT); pan_cast_and_pack(t.cpu, TILER_CONTEXT, tiler) { - tiler.hierarchy_mask = - pan_select_tiler_hierarchy_mask(batch->key.width, - batch->key.height, - dev->tiler_features.max_levels); + /* On JM, we don't care of passing the tile_size as it only matters for v12+ */ + tiler.hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)( + batch->key.width, batch->key.height, dev->tiler_features.max_levels, 0, + panfrost_bo_size(dev->tiler_heap)); tiler.fb_width = batch->key.width; tiler.fb_height = batch->key.height; diff --git a/src/panfrost/ci/panfrost-g720-fails.txt b/src/panfrost/ci/panfrost-g720-fails.txt index 195f7900dd7..b53fd5d6293 100644 --- a/src/panfrost/ci/panfrost-g720-fails.txt +++ b/src/panfrost/ci/panfrost-g720-fails.txt @@ -9,7 +9,3 @@ dEQP-VK.glsl.loops.special.do_while_dynamic_iterations.dowhile_trap_vertex,Crash # Seems to be a precision issues because of floor fp16 being dropped since v11 (and the conversion done as a result) dEQP-GLES3.functional.shaders.builtin_functions.common.fract.vec2_lowp_vertex,Fail dEQP-GLES31.functional.shaders.builtin_functions.common.fract.vec2_lowp_compute,Fail - -# Issue with color / depth internal buffer overrun -dEQP-GLES31.functional.fbo.no_attachments.maximums.all,Crash -dEQP-GLES31.functional.fbo.no_attachments.maximums.size,Crash diff --git a/src/panfrost/ci/panfrost-g725-fails.txt b/src/panfrost/ci/panfrost-g725-fails.txt index 0b40e0a1a2c..1f197a9a466 100644 --- a/src/panfrost/ci/panfrost-g725-fails.txt +++ b/src/panfrost/ci/panfrost-g725-fails.txt @@ -7,7 +7,3 @@ dEQP-VK.pipeline.fast_linked_library.misc.interpolate_at_sample_no_sample_shadin # Seems to be a precision issues because of floor fp16 being dropped since v11 (and the conversion done as a result) dEQP-GLES3.functional.shaders.builtin_functions.common.fract.vec2_lowp_vertex,Fail dEQP-GLES31.functional.shaders.builtin_functions.common.fract.vec2_lowp_compute,Fail - -# Issue with color / depth internal buffer overrun -dEQP-GLES31.functional.fbo.no_attachments.maximums.all,Crash -dEQP-GLES31.functional.fbo.no_attachments.maximums.size,Crash diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index b1a13af00fe..79b05e99852 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -34,6 +34,8 @@ #include "pan_props.h" #include "pan_texture.h" +#define PAN_BIN_LEVEL_COUNT 12 + static unsigned mod_to_block_fmt(uint64_t mod) { @@ -1160,3 +1162,103 @@ GENX(pan_emit_fragment_job_payload)(const struct pan_fb_info *fb, uint64_t fbd, } } #endif + +#if PAN_ARCH >= 6 +static uint32_t +pan_calc_bins_pointer_size(uint32_t width, uint32_t height, uint32_t tile_size, + uint32_t hierarchy_mask) +{ + const uint32_t bin_ptr_size = PAN_ARCH >= 12 ? 16 : 8; + + uint32_t bins_x[PAN_BIN_LEVEL_COUNT]; + uint32_t bins_y[PAN_BIN_LEVEL_COUNT]; + uint32_t bins[PAN_BIN_LEVEL_COUNT]; + uint32_t bins_enabled; + + /* On v12+, hierarchy_mask is only used if 4 levels are used at most, + * otherwise it selects another mask (0xAC with a tile_size greater than + * 32x32, 0xAC with 32x32 and lower) */ + if ((hierarchy_mask == 0 || util_bitcount(hierarchy_mask) > 4) && + PAN_ARCH >= 12) { + if (tile_size > 32 * 32) + hierarchy_mask = 0xAC; + else + hierarchy_mask = 0xAA; + } + + bins_x[0] = DIV_ROUND_UP(width, 16); + bins_y[0] = DIV_ROUND_UP(height, 16); + bins[0] = bins_x[0] * bins_y[0]; + + for (uint32_t i = 1; i < ARRAY_SIZE(bins); i++) { + bins_x[i] = DIV_ROUND_UP(bins_x[i - 1], 2); + bins_y[i] = DIV_ROUND_UP(bins_y[i - 1], 2); + bins[i] = bins_x[i] * bins_y[i]; + } + + bins_enabled = 0; + for (uint32_t i = 0; i < ARRAY_SIZE(bins); i++) { + if ((hierarchy_mask & (1 << i)) != 0) + bins_enabled += bins[i]; + } + + return DIV_ROUND_UP(bins_enabled, 8) * 8 * bin_ptr_size; +} + +unsigned +GENX(pan_select_tiler_hierarchy_mask)(unsigned width, unsigned height, + unsigned max_levels, unsigned tile_size, + unsigned mem_budget) +{ + /* On v12+, the hierarchy_mask is deprecated and letting the hardware decide + * is prefered. We attempt to use hierarchy_mask of 0 in case the bins can + * fit in our memory budget. + */ + if (PAN_ARCH >= 12 && + pan_calc_bins_pointer_size(width, height, tile_size, 0) <= mem_budget) + return 0; + + uint32_t max_fb_wh = MAX2(width, height); + uint32_t last_hierarchy_bit = util_last_bit(DIV_ROUND_UP(max_fb_wh, 16)); + uint32_t hierarchy_mask = BITFIELD_MASK(max_levels); + + /* Always enable the level covering the whole FB, and disable the finest + * levels if we don't have enough to cover everything. + * This is suboptimal for small primitives, since it might force + * primitives to be walked multiple times even if they don't cover the + * the tile being processed. On the other hand, it's hard to guess + * the draw pattern, so it's probably good enough for now. + */ + if (last_hierarchy_bit > max_levels) + hierarchy_mask <<= last_hierarchy_bit - max_levels; + + /* Disable hierarchies falling under the effective tile size. */ + uint32_t disable_hierarchies; + for (disable_hierarchies = 0; + tile_size > (16 * 16) << (disable_hierarchies * 2); + disable_hierarchies++) + ; + hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies); + + /* Disable hierachies that would cause the bins to fit in our budget */ + while (disable_hierarchies < PAN_BIN_LEVEL_COUNT) { + uint32_t bins_ptr_size = + pan_calc_bins_pointer_size(width, height, tile_size, hierarchy_mask); + + if (bins_ptr_size < mem_budget) + break; + + disable_hierarchies++; + hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies); + } + + /* We should fit in our budget at this point */ + assert(pan_calc_bins_pointer_size(width, height, tile_size, + hierarchy_mask) <= mem_budget); + + /* Before v12, at least one hierarchy level must be enabled. */ + assert(hierarchy_mask != 0 || PAN_ARCH >= 12); + + return hierarchy_mask; +} +#endif diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h index 3b91e8ae6d5..0072a6d0bf2 100644 --- a/src/panfrost/lib/pan_desc.h +++ b/src/panfrost/lib/pan_desc.h @@ -198,6 +198,13 @@ unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx, const struct pan_tiler_context *tiler_ctx, void *out); +#if PAN_ARCH >= 6 +unsigned GENX(pan_select_tiler_hierarchy_mask)(uint32_t width, uint32_t height, + uint32_t max_levels, + uint32_t tile_size, + uint32_t mem_budget); +#endif + #if PAN_ARCH <= 9 void GENX(pan_emit_fragment_job_payload)(const struct pan_fb_info *fb, uint64_t fbd, void *out); diff --git a/src/panfrost/lib/pan_util.h b/src/panfrost/lib/pan_util.h index a4b8deb37d1..8b0deb7f100 100644 --- a/src/panfrost/lib/pan_util.h +++ b/src/panfrost/lib/pan_util.h @@ -79,25 +79,4 @@ panfrost_last_nonnull(uint64_t *ptrs, unsigned count) return 0; } -static inline uint32_t -pan_select_tiler_hierarchy_mask(unsigned width, unsigned height, - unsigned max_levels) -{ - uint32_t max_fb_wh = MAX2(width, height); - uint32_t last_hierarchy_bit = util_last_bit(DIV_ROUND_UP(max_fb_wh, 16)); - uint32_t hierarchy_mask = BITFIELD_MASK(max_levels); - - /* Always enable the level covering the whole FB, and disable the finest - * levels if we don't have enough to cover everything. - * This is suboptimal for small primitives, since it might force - * primitives to be walked multiple times even if they don't cover the - * the tile being processed. On the other hand, it's hard to guess - * the draw pattern, so it's probably good enough for now. - */ - if (last_hierarchy_bit > max_levels) - hierarchy_mask <<= last_hierarchy_bit - max_levels; - - return hierarchy_mask; -} - #endif /* PAN_UTIL_H */ diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index ff45e337a15..62306ab9a40 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -820,8 +820,9 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf) unsigned max_levels = tiler_features.max_levels; assert(max_levels >= 2); - cfg.hierarchy_mask = - panvk_select_tiler_hierarchy_mask(phys_dev, &cmdbuf->state.gfx); + /* The tiler chunk start with a header of 64 bytes */ + cfg.hierarchy_mask = panvk_select_tiler_hierarchy_mask( + phys_dev, &cmdbuf->state.gfx, phys_dev->csf.tiler.chunk_size - 64); cfg.fb_width = fbinfo->width; cfg.fb_height = fbinfo->height; diff --git a/src/panfrost/vulkan/csf/panvk_vX_queue.c b/src/panfrost/vulkan/csf/panvk_vX_queue.c index 4e89734d54a..7d7007fa57f 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_queue.c @@ -641,6 +641,8 @@ static VkResult init_tiler(struct panvk_queue *queue) { struct panvk_device *dev = to_panvk_device(queue->vk.base.device); + const struct panvk_physical_device *phys_dev = + to_panvk_physical_device(dev->vk.physical); struct panvk_tiler_heap *tiler_heap = &queue->tiler_heap; VkResult result; @@ -659,13 +661,13 @@ init_tiler(struct panvk_queue *queue) goto err_free_desc; } - tiler_heap->chunk_size = 2 * 1024 * 1024; + tiler_heap->chunk_size = phys_dev->csf.tiler.chunk_size; struct drm_panthor_tiler_heap_create thc = { .vm_id = pan_kmod_vm_handle(dev->kmod.vm), .chunk_size = tiler_heap->chunk_size, - .initial_chunk_count = 5, - .max_chunks = 64, + .initial_chunk_count = phys_dev->csf.tiler.initial_chunks, + .max_chunks = phys_dev->csf.tiler.max_chunks, .target_in_flight = 65535, }; diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c index 8d4e662cc94..d521f88d79a 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c @@ -255,8 +255,8 @@ panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf, } pan_pack(&batch->tiler.ctx_templ, TILER_CONTEXT, cfg) { - cfg.hierarchy_mask = - panvk_select_tiler_hierarchy_mask(phys_dev, &cmdbuf->state.gfx); + cfg.hierarchy_mask = panvk_select_tiler_hierarchy_mask( + phys_dev, &cmdbuf->state.gfx, pan_kmod_bo_size(dev->tiler_heap->bo)); cfg.fb_width = fbinfo->width; cfg.fb_height = fbinfo->height; cfg.heap = batch->tiler.heap_desc.gpu; diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index c8d30a206b2..aae044e48f0 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -200,23 +200,16 @@ struct panvk_cmd_graphics_state { static inline uint32_t panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev, - const struct panvk_cmd_graphics_state *state) + const struct panvk_cmd_graphics_state *state, + unsigned bin_ptr_mem_budget) { struct panfrost_tiler_features tiler_features = panfrost_query_tiler_features(&phys_dev->kmod.props); - uint32_t hierarchy_mask = - pan_select_tiler_hierarchy_mask(state->render.fb.info.width, - state->render.fb.info.height, - tiler_features.max_levels); - - /* Disable hierarchies falling under the effective tile size. */ - uint32_t disable_hierarchies; - for (disable_hierarchies = 0; state->render.fb.info.tile_size > - (16 * 16) << (disable_hierarchies * 2); - disable_hierarchies++) - ; - hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies); + uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)( + state->render.fb.info.width, state->render.fb.info.height, + tiler_features.max_levels, state->render.fb.info.tile_size, + bin_ptr_mem_budget); return hierarchy_mask; } diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c index 968180a9994..b599dc2a8f5 100644 --- a/src/panfrost/vulkan/panvk_physical_device.c +++ b/src/panfrost/vulkan/panvk_physical_device.c @@ -1140,6 +1140,13 @@ panvk_physical_device_init(struct panvk_physical_device *device, if (result != VK_SUCCESS) goto fail; + if (arch >= 10) { + /* XXX: Make dri options for thoses */ + device->csf.tiler.chunk_size = 2 * 1024 * 1024; + device->csf.tiler.initial_chunks = 5; + device->csf.tiler.max_chunks = 64; + } + if (arch != 10) vk_warn_non_conformant_implementation("panvk"); diff --git a/src/panfrost/vulkan/panvk_physical_device.h b/src/panfrost/vulkan/panvk_physical_device.h index ebe81284b3e..cc5694a2138 100644 --- a/src/panfrost/vulkan/panvk_physical_device.h +++ b/src/panfrost/vulkan/panvk_physical_device.h @@ -34,6 +34,16 @@ struct panvk_physical_device { const struct panfrost_model *model; + union { + struct { + struct { + uint32_t chunk_size; + uint32_t initial_chunks; + uint32_t max_chunks; + } tiler; + } csf; + }; + struct { dev_t primary_rdev; dev_t render_rdev;