mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 08:40:11 +01:00
panfrost: Take tiler memory budget into account in pan_select_tiler_hierarchy_mask
On v12+, the hardware report support for 8 levels but
effectively only support up to 4 levels.
In case more than 4 levels are used, it will default to 0xAA when
tile_size is 32x32 or lower, otherwise 0xAC when the tile_size is greater than 32x32.
This patch makes it that we now ensure that the bins can fit inside out
tiler budget and otherwise drop levels until it fit.
This also allows the hardware to decide the hierarchy on v12+
if we know it will fit.
This fixes "dEQP-GLES31.functional.fbo.no_attachments.maximums.all" and
dEQP-GLES31.functional.fbo.no_attachments.maximums.size" on v12+ but
also likely more if we were exhausting the memory budget.
Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Backport-to: 25.1
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34559>
(cherry picked from commit 92afeb37bf)
This commit is contained in:
parent
5bae75e3a0
commit
066850bb3a
14 changed files with 152 additions and 66 deletions
|
|
@ -144,7 +144,7 @@
|
|||
"description": "panfrost: Take tiler memory budget into account in pan_select_tiler_hierarchy_mask",
|
||||
"nominated": true,
|
||||
"nomination_type": 4,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -687,23 +687,16 @@ csf_emit_tiler_desc(struct panfrost_batch *batch, const struct pan_fb_info *fb)
|
|||
{
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
struct panfrost_screen *screen = pan_screen(ctx->base.screen);
|
||||
|
||||
if (!batch->csf.pending_tiler_desc)
|
||||
return;
|
||||
|
||||
/* The tiler chunk start with a header of 64 bytes */
|
||||
pan_pack(batch->csf.pending_tiler_desc, TILER_CONTEXT, tiler) {
|
||||
tiler.hierarchy_mask =
|
||||
pan_select_tiler_hierarchy_mask(batch->key.width,
|
||||
batch->key.height,
|
||||
dev->tiler_features.max_levels);
|
||||
|
||||
/* Disable hierarchies falling under the effective tile size. */
|
||||
uint32_t disable_hierarchies;
|
||||
for (disable_hierarchies = 0;
|
||||
fb->tile_size > (16 * 16) << (disable_hierarchies * 2);
|
||||
disable_hierarchies++)
|
||||
;
|
||||
tiler.hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies);
|
||||
tiler.hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
|
||||
batch->key.width, batch->key.height, dev->tiler_features.max_levels,
|
||||
fb->tile_size, screen->csf_tiler_heap.chunk_size - 64);
|
||||
|
||||
#if PAN_ARCH >= 12
|
||||
tiler.effective_tile_size = fb->tile_size;
|
||||
|
|
|
|||
|
|
@ -426,10 +426,10 @@ jm_emit_tiler_desc(struct panfrost_batch *batch)
|
|||
|
||||
t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
|
||||
pan_cast_and_pack(t.cpu, TILER_CONTEXT, tiler) {
|
||||
tiler.hierarchy_mask =
|
||||
pan_select_tiler_hierarchy_mask(batch->key.width,
|
||||
batch->key.height,
|
||||
dev->tiler_features.max_levels);
|
||||
/* On JM, we don't care of passing the tile_size as it only matters for v12+ */
|
||||
tiler.hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
|
||||
batch->key.width, batch->key.height, dev->tiler_features.max_levels, 0,
|
||||
panfrost_bo_size(dev->tiler_heap));
|
||||
|
||||
tiler.fb_width = batch->key.width;
|
||||
tiler.fb_height = batch->key.height;
|
||||
|
|
|
|||
|
|
@ -9,7 +9,3 @@ dEQP-VK.glsl.loops.special.do_while_dynamic_iterations.dowhile_trap_vertex,Crash
|
|||
# Seems to be a precision issues because of floor fp16 being dropped since v11 (and the conversion done as a result)
|
||||
dEQP-GLES3.functional.shaders.builtin_functions.common.fract.vec2_lowp_vertex,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.common.fract.vec2_lowp_compute,Fail
|
||||
|
||||
# Issue with color / depth internal buffer overrun
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.all,Crash
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.size,Crash
|
||||
|
|
|
|||
|
|
@ -7,7 +7,3 @@ dEQP-VK.pipeline.fast_linked_library.misc.interpolate_at_sample_no_sample_shadin
|
|||
# Seems to be a precision issues because of floor fp16 being dropped since v11 (and the conversion done as a result)
|
||||
dEQP-GLES3.functional.shaders.builtin_functions.common.fract.vec2_lowp_vertex,Fail
|
||||
dEQP-GLES31.functional.shaders.builtin_functions.common.fract.vec2_lowp_compute,Fail
|
||||
|
||||
# Issue with color / depth internal buffer overrun
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.all,Crash
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.size,Crash
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@
|
|||
#include "pan_props.h"
|
||||
#include "pan_texture.h"
|
||||
|
||||
#define PAN_BIN_LEVEL_COUNT 12
|
||||
|
||||
static unsigned
|
||||
mod_to_block_fmt(uint64_t mod)
|
||||
{
|
||||
|
|
@ -1160,3 +1162,103 @@ GENX(pan_emit_fragment_job_payload)(const struct pan_fb_info *fb, uint64_t fbd,
|
|||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
static uint32_t
|
||||
pan_calc_bins_pointer_size(uint32_t width, uint32_t height, uint32_t tile_size,
|
||||
uint32_t hierarchy_mask)
|
||||
{
|
||||
const uint32_t bin_ptr_size = PAN_ARCH >= 12 ? 16 : 8;
|
||||
|
||||
uint32_t bins_x[PAN_BIN_LEVEL_COUNT];
|
||||
uint32_t bins_y[PAN_BIN_LEVEL_COUNT];
|
||||
uint32_t bins[PAN_BIN_LEVEL_COUNT];
|
||||
uint32_t bins_enabled;
|
||||
|
||||
/* On v12+, hierarchy_mask is only used if 4 levels are used at most,
|
||||
* otherwise it selects another mask (0xAC with a tile_size greater than
|
||||
* 32x32, 0xAC with 32x32 and lower) */
|
||||
if ((hierarchy_mask == 0 || util_bitcount(hierarchy_mask) > 4) &&
|
||||
PAN_ARCH >= 12) {
|
||||
if (tile_size > 32 * 32)
|
||||
hierarchy_mask = 0xAC;
|
||||
else
|
||||
hierarchy_mask = 0xAA;
|
||||
}
|
||||
|
||||
bins_x[0] = DIV_ROUND_UP(width, 16);
|
||||
bins_y[0] = DIV_ROUND_UP(height, 16);
|
||||
bins[0] = bins_x[0] * bins_y[0];
|
||||
|
||||
for (uint32_t i = 1; i < ARRAY_SIZE(bins); i++) {
|
||||
bins_x[i] = DIV_ROUND_UP(bins_x[i - 1], 2);
|
||||
bins_y[i] = DIV_ROUND_UP(bins_y[i - 1], 2);
|
||||
bins[i] = bins_x[i] * bins_y[i];
|
||||
}
|
||||
|
||||
bins_enabled = 0;
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(bins); i++) {
|
||||
if ((hierarchy_mask & (1 << i)) != 0)
|
||||
bins_enabled += bins[i];
|
||||
}
|
||||
|
||||
return DIV_ROUND_UP(bins_enabled, 8) * 8 * bin_ptr_size;
|
||||
}
|
||||
|
||||
unsigned
|
||||
GENX(pan_select_tiler_hierarchy_mask)(unsigned width, unsigned height,
|
||||
unsigned max_levels, unsigned tile_size,
|
||||
unsigned mem_budget)
|
||||
{
|
||||
/* On v12+, the hierarchy_mask is deprecated and letting the hardware decide
|
||||
* is prefered. We attempt to use hierarchy_mask of 0 in case the bins can
|
||||
* fit in our memory budget.
|
||||
*/
|
||||
if (PAN_ARCH >= 12 &&
|
||||
pan_calc_bins_pointer_size(width, height, tile_size, 0) <= mem_budget)
|
||||
return 0;
|
||||
|
||||
uint32_t max_fb_wh = MAX2(width, height);
|
||||
uint32_t last_hierarchy_bit = util_last_bit(DIV_ROUND_UP(max_fb_wh, 16));
|
||||
uint32_t hierarchy_mask = BITFIELD_MASK(max_levels);
|
||||
|
||||
/* Always enable the level covering the whole FB, and disable the finest
|
||||
* levels if we don't have enough to cover everything.
|
||||
* This is suboptimal for small primitives, since it might force
|
||||
* primitives to be walked multiple times even if they don't cover the
|
||||
* the tile being processed. On the other hand, it's hard to guess
|
||||
* the draw pattern, so it's probably good enough for now.
|
||||
*/
|
||||
if (last_hierarchy_bit > max_levels)
|
||||
hierarchy_mask <<= last_hierarchy_bit - max_levels;
|
||||
|
||||
/* Disable hierarchies falling under the effective tile size. */
|
||||
uint32_t disable_hierarchies;
|
||||
for (disable_hierarchies = 0;
|
||||
tile_size > (16 * 16) << (disable_hierarchies * 2);
|
||||
disable_hierarchies++)
|
||||
;
|
||||
hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies);
|
||||
|
||||
/* Disable hierachies that would cause the bins to fit in our budget */
|
||||
while (disable_hierarchies < PAN_BIN_LEVEL_COUNT) {
|
||||
uint32_t bins_ptr_size =
|
||||
pan_calc_bins_pointer_size(width, height, tile_size, hierarchy_mask);
|
||||
|
||||
if (bins_ptr_size < mem_budget)
|
||||
break;
|
||||
|
||||
disable_hierarchies++;
|
||||
hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies);
|
||||
}
|
||||
|
||||
/* We should fit in our budget at this point */
|
||||
assert(pan_calc_bins_pointer_size(width, height, tile_size,
|
||||
hierarchy_mask) <= mem_budget);
|
||||
|
||||
/* Before v12, at least one hierarchy level must be enabled. */
|
||||
assert(hierarchy_mask != 0 || PAN_ARCH >= 12);
|
||||
|
||||
return hierarchy_mask;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -198,6 +198,13 @@ unsigned GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
|||
const struct pan_tiler_context *tiler_ctx,
|
||||
void *out);
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
unsigned GENX(pan_select_tiler_hierarchy_mask)(uint32_t width, uint32_t height,
|
||||
uint32_t max_levels,
|
||||
uint32_t tile_size,
|
||||
uint32_t mem_budget);
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH <= 9
|
||||
void GENX(pan_emit_fragment_job_payload)(const struct pan_fb_info *fb,
|
||||
uint64_t fbd, void *out);
|
||||
|
|
|
|||
|
|
@ -79,25 +79,4 @@ panfrost_last_nonnull(uint64_t *ptrs, unsigned count)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
pan_select_tiler_hierarchy_mask(unsigned width, unsigned height,
|
||||
unsigned max_levels)
|
||||
{
|
||||
uint32_t max_fb_wh = MAX2(width, height);
|
||||
uint32_t last_hierarchy_bit = util_last_bit(DIV_ROUND_UP(max_fb_wh, 16));
|
||||
uint32_t hierarchy_mask = BITFIELD_MASK(max_levels);
|
||||
|
||||
/* Always enable the level covering the whole FB, and disable the finest
|
||||
* levels if we don't have enough to cover everything.
|
||||
* This is suboptimal for small primitives, since it might force
|
||||
* primitives to be walked multiple times even if they don't cover the
|
||||
* the tile being processed. On the other hand, it's hard to guess
|
||||
* the draw pattern, so it's probably good enough for now.
|
||||
*/
|
||||
if (last_hierarchy_bit > max_levels)
|
||||
hierarchy_mask <<= last_hierarchy_bit - max_levels;
|
||||
|
||||
return hierarchy_mask;
|
||||
}
|
||||
|
||||
#endif /* PAN_UTIL_H */
|
||||
|
|
|
|||
|
|
@ -820,8 +820,9 @@ get_tiler_desc(struct panvk_cmd_buffer *cmdbuf)
|
|||
unsigned max_levels = tiler_features.max_levels;
|
||||
assert(max_levels >= 2);
|
||||
|
||||
cfg.hierarchy_mask =
|
||||
panvk_select_tiler_hierarchy_mask(phys_dev, &cmdbuf->state.gfx);
|
||||
/* The tiler chunk start with a header of 64 bytes */
|
||||
cfg.hierarchy_mask = panvk_select_tiler_hierarchy_mask(
|
||||
phys_dev, &cmdbuf->state.gfx, phys_dev->csf.tiler.chunk_size - 64);
|
||||
cfg.fb_width = fbinfo->width;
|
||||
cfg.fb_height = fbinfo->height;
|
||||
|
||||
|
|
|
|||
|
|
@ -641,6 +641,8 @@ static VkResult
|
|||
init_tiler(struct panvk_queue *queue)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
|
||||
const struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
struct panvk_tiler_heap *tiler_heap = &queue->tiler_heap;
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -659,13 +661,13 @@ init_tiler(struct panvk_queue *queue)
|
|||
goto err_free_desc;
|
||||
}
|
||||
|
||||
tiler_heap->chunk_size = 2 * 1024 * 1024;
|
||||
tiler_heap->chunk_size = phys_dev->csf.tiler.chunk_size;
|
||||
|
||||
struct drm_panthor_tiler_heap_create thc = {
|
||||
.vm_id = pan_kmod_vm_handle(dev->kmod.vm),
|
||||
.chunk_size = tiler_heap->chunk_size,
|
||||
.initial_chunk_count = 5,
|
||||
.max_chunks = 64,
|
||||
.initial_chunk_count = phys_dev->csf.tiler.initial_chunks,
|
||||
.max_chunks = phys_dev->csf.tiler.max_chunks,
|
||||
.target_in_flight = 65535,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -255,8 +255,8 @@ panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
|
|||
}
|
||||
|
||||
pan_pack(&batch->tiler.ctx_templ, TILER_CONTEXT, cfg) {
|
||||
cfg.hierarchy_mask =
|
||||
panvk_select_tiler_hierarchy_mask(phys_dev, &cmdbuf->state.gfx);
|
||||
cfg.hierarchy_mask = panvk_select_tiler_hierarchy_mask(
|
||||
phys_dev, &cmdbuf->state.gfx, pan_kmod_bo_size(dev->tiler_heap->bo));
|
||||
cfg.fb_width = fbinfo->width;
|
||||
cfg.fb_height = fbinfo->height;
|
||||
cfg.heap = batch->tiler.heap_desc.gpu;
|
||||
|
|
|
|||
|
|
@ -200,23 +200,16 @@ struct panvk_cmd_graphics_state {
|
|||
|
||||
static inline uint32_t
|
||||
panvk_select_tiler_hierarchy_mask(const struct panvk_physical_device *phys_dev,
|
||||
const struct panvk_cmd_graphics_state *state)
|
||||
const struct panvk_cmd_graphics_state *state,
|
||||
unsigned bin_ptr_mem_budget)
|
||||
{
|
||||
struct panfrost_tiler_features tiler_features =
|
||||
panfrost_query_tiler_features(&phys_dev->kmod.props);
|
||||
|
||||
uint32_t hierarchy_mask =
|
||||
pan_select_tiler_hierarchy_mask(state->render.fb.info.width,
|
||||
state->render.fb.info.height,
|
||||
tiler_features.max_levels);
|
||||
|
||||
/* Disable hierarchies falling under the effective tile size. */
|
||||
uint32_t disable_hierarchies;
|
||||
for (disable_hierarchies = 0; state->render.fb.info.tile_size >
|
||||
(16 * 16) << (disable_hierarchies * 2);
|
||||
disable_hierarchies++)
|
||||
;
|
||||
hierarchy_mask &= ~BITFIELD_MASK(disable_hierarchies);
|
||||
uint32_t hierarchy_mask = GENX(pan_select_tiler_hierarchy_mask)(
|
||||
state->render.fb.info.width, state->render.fb.info.height,
|
||||
tiler_features.max_levels, state->render.fb.info.tile_size,
|
||||
bin_ptr_mem_budget);
|
||||
|
||||
return hierarchy_mask;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1140,6 +1140,13 @@ panvk_physical_device_init(struct panvk_physical_device *device,
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (arch >= 10) {
|
||||
/* XXX: Make dri options for thoses */
|
||||
device->csf.tiler.chunk_size = 2 * 1024 * 1024;
|
||||
device->csf.tiler.initial_chunks = 5;
|
||||
device->csf.tiler.max_chunks = 64;
|
||||
}
|
||||
|
||||
if (arch != 10)
|
||||
vk_warn_non_conformant_implementation("panvk");
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,16 @@ struct panvk_physical_device {
|
|||
|
||||
const struct panfrost_model *model;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct {
|
||||
uint32_t chunk_size;
|
||||
uint32_t initial_chunks;
|
||||
uint32_t max_chunks;
|
||||
} tiler;
|
||||
} csf;
|
||||
};
|
||||
|
||||
struct {
|
||||
dev_t primary_rdev;
|
||||
dev_t render_rdev;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue