From 4573110e4e3bd950bfe93ba9eb2c88ad2b48c388 Mon Sep 17 00:00:00 2001 From: Ludvig Lindau Date: Tue, 26 Aug 2025 12:04:34 +0000 Subject: [PATCH] pan/v9+: Make texel buffers use BufferDescriptor Texel buffers are currently described by a TextureDescriptor,which leads to restrictive limits on size and alignment. These limits can be avoided by using a BufferDescriptor instead. This requires first embedding a ConversionDescriptor into some of the currently empty space of the BufferDescriptor, and modifying the compiler so that instead of outputting TEX_FETCH, it will: 1. Load the ConversionDescriptor with LD_PKA 2. Get the buffer address with LEA_BUF[_IMM] 3. Use LD_CVT to get the value Reviewed-by: Erik Faye-Lund Reviewed-by: Boris Brezillon Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 34 +++++--- src/gallium/drivers/panfrost/pan_shader.c | 1 + src/panfrost/clc/pan_compile.c | 1 + src/panfrost/compiler/bifrost_compile.c | 6 ++ src/panfrost/lib/pan_texture.c | 87 ++++++------------- src/panfrost/lib/pan_texture.h | 14 ++- src/panfrost/vulkan/panvk_buffer_view.h | 6 +- src/panfrost/vulkan/panvk_physical_device.c | 9 +- src/panfrost/vulkan/panvk_vX_buffer_view.c | 8 +- src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c | 1 + src/panfrost/vulkan/panvk_vX_descriptor_set.c | 2 +- .../vulkan/panvk_vX_nir_lower_descriptors.c | 8 ++ src/panfrost/vulkan/panvk_vX_shader.c | 1 + 13 files changed, 88 insertions(+), 90 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 96383eb0117..a57387dbf71 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -78,11 +78,19 @@ struct panfrost_sampler_state { }; /* Misnomer: Sampler view corresponds to textures, not samplers */ - +struct mali_buffer_packed; struct panfrost_sampler_view { struct pipe_sampler_view base; struct panfrost_pool_ref state; - struct mali_texture_packed bifrost_descriptor; +#if PAN_ARCH >= 9 + union { + struct mali_buffer_packed bifrost_buf_descriptor; + struct mali_texture_packed bifrost_tex_descriptor; + }; +#else + /* TODO: move Bifrost over to using BufferDescriptor as well. */ + struct mali_texture_packed bifrost_tex_descriptor; +#endif uint64_t texture_bo; uint64_t texture_size; uint64_t modifier; @@ -1001,7 +1009,7 @@ panfrost_emit_images(struct panfrost_batch *batch, mesa_shader_stage stage) }; panfrost_update_sampler_view(&view, &ctx->base); - out[i] = view.bifrost_descriptor; + out[i] = view.bifrost_tex_descriptor; panfrost_track_image_access(batch, stage, image); } @@ -1713,8 +1721,6 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, panfrost_translate_texture_dimension(so->base.target); if (so->base.target == PIPE_BUFFER) { - const struct util_format_description *desc = - util_format_description(format); struct pan_buffer_view bview = { .format = format, .width_el = @@ -1723,15 +1729,20 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, .base = prsrc->plane.base + so->base.u.buf.offset, }; +#if PAN_ARCH >= 9 + void *tex = &so->bifrost_buf_descriptor; + GENX(pan_buffer_texture_emit)(&bview, tex); + return; +#else + const struct util_format_description *desc = + util_format_description(format); if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { bview.astc.narrow = so->base.astc_decode_format == PIPE_ASTC_DECODE_FORMAT_UNORM8; bview.astc.hdr = util_format_is_astc_hdr(format); } -#if PAN_ARCH >= 9 - unsigned payload_size = pan_size(NULL_PLANE); -#elif PAN_ARCH >= 6 +#if PAN_ARCH >= 6 unsigned payload_size = pan_size(SURFACE_WITH_STRIDE); #else unsigned payload_size = pan_size(TEXTURE) + pan_size(SURFACE_WITH_STRIDE); @@ -1748,7 +1759,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, so->state = panfrost_pool_take_ref(pool, payload.gpu); - void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu; + void *tex = (PAN_ARCH >= 6) ? &so->bifrost_tex_descriptor : payload.cpu; if (PAN_ARCH <= 5) { payload.cpu += pan_size(TEXTURE); @@ -1757,6 +1768,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, GENX(pan_buffer_texture_emit)(&bview, tex, &payload); return; +#endif } unsigned first_level = so->base.u.tex.first_level; @@ -1813,7 +1825,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, so->state = panfrost_pool_take_ref(pool, payload.gpu); - void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu; + void *tex = (PAN_ARCH >= 6) ? &so->bifrost_tex_descriptor : payload.cpu; if (PAN_ARCH <= 5) { payload.cpu += pan_size(TEXTURE); @@ -1913,7 +1925,7 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch, struct panfrost_resource *rsrc = pan_resource(pview->texture); panfrost_update_sampler_view(view, &ctx->base); - out[i] = view->bifrost_descriptor; + out[i] = view->bifrost_tex_descriptor; panfrost_batch_read_rsrc(batch, rsrc, stage); panfrost_batch_add_bo(batch, view->state.bo, stage); diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 34421c6c786..0c05e15ef47 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -225,6 +225,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, /* Lower resource indices */ NIR_PASS(_, s, panfrost_nir_lower_res_indices, &inputs); + pan_shader_lower_texture_late(s, inputs.gpu_id); if (dev->arch >= 9) { inputs.valhall.use_ld_var_buf = panfrost_use_ld_var_buf(s); diff --git a/src/panfrost/clc/pan_compile.c b/src/panfrost/clc/pan_compile.c index 76f4be52de1..3488fe094d3 100644 --- a/src/panfrost/clc/pan_compile.c +++ b/src/panfrost/clc/pan_compile.c @@ -426,6 +426,7 @@ main(int argc, const char **argv) pan_shader_preprocess(s, inputs.gpu_id); pan_shader_lower_texture_early(s, inputs.gpu_id); pan_shader_postprocess(s, inputs.gpu_id); + pan_shader_lower_texture_late(s, inputs.gpu_id); NIR_PASS(_, s, nir_opt_deref); NIR_PASS(_, s, nir_lower_vars_to_ssa); diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 21e02780b1b..b5785665a90 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -1687,6 +1687,8 @@ static void bi_emit_image_load(bi_builder *b, nir_intrinsic_instr *instr) { enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); + assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) && + "Texel buffers should already have been lowered"); unsigned coord_comps = nir_image_intrinsic_coord_components(instr); bool array = nir_intrinsic_image_array(instr) || dim == GLSL_SAMPLER_DIM_CUBE; @@ -1737,6 +1739,8 @@ static void bi_emit_lea_image_to(bi_builder *b, bi_index dest, nir_intrinsic_instr *instr) { enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); + assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) && + "Texel buffers should already have been lowered"); bool array = nir_intrinsic_image_array(instr) || dim == GLSL_SAMPLER_DIM_CUBE; unsigned coord_comps = nir_image_intrinsic_coord_components(instr); @@ -4680,6 +4684,8 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr) break; case nir_texop_txf: case nir_texop_txf_ms: { + assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF && + "Texel buffers should already have been lowered"); /* On Valhall, TEX_FETCH doesn't have CUBE support. This is not a problem * as a cube is just a 2D array in any cases. */ if (dim == BI_DIMENSION_CUBE) diff --git a/src/panfrost/lib/pan_texture.c b/src/panfrost/lib/pan_texture.c index aa464f070c3..a3a5f850f58 100644 --- a/src/panfrost/lib/pan_texture.c +++ b/src/panfrost/lib/pan_texture.c @@ -357,59 +357,6 @@ translate_superblock_size(uint64_t modifier) (cfg__).slice_stride = size__ #endif -static void -pan_emit_bview_plane(const struct pan_buffer_view *bview, void *payload) -{ - const struct util_format_description *desc = - util_format_description(bview->format); - uint64_t size = - (uint64_t)util_format_get_blocksize(bview->format) * bview->width_el; - - if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { - bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); - /* sRGB formats decode to RGBA8 sRGB, which is narrow. - * - * Non-sRGB formats decode to RGBA16F which is wide except if decode - * precision is set to GL_RGBA8 for that texture. - */ - bool wide = !srgb && !bview->astc.narrow; - - if (desc->block.depth > 1) { - pan_cast_and_pack(payload, ASTC_3D_PLANE, cfg) { - cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR; - cfg.decode_hdr = bview->astc.hdr; - cfg.decode_wide = wide; - cfg.block_width = pan_astc_dim_3d(desc->block.width); - cfg.block_height = pan_astc_dim_3d(desc->block.height); - cfg.block_depth = pan_astc_dim_3d(desc->block.depth); - cfg.pointer = bview->base; - PLANE_SET_SIZE(cfg, size); - PLANE_SET_EXTENT(cfg, bview->width_el, 1); - } - } else { - pan_cast_and_pack(payload, ASTC_2D_PLANE, cfg) { - cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR; - cfg.decode_hdr = bview->astc.hdr; - cfg.decode_wide = wide; - cfg.block_width = pan_astc_dim_2d(desc->block.width); - cfg.block_height = pan_astc_dim_2d(desc->block.height); - PLANE_SET_SIZE(cfg, size); - cfg.pointer = bview->base; - PLANE_SET_EXTENT(cfg, bview->width_el, 1); - } - } - } else { - pan_cast_and_pack(payload, GENERIC_PLANE, cfg) { - cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR; - cfg.clump_format = pan_clump_format(bview->format); - PLANE_SET_SIZE(cfg, size); - cfg.pointer = bview->base; - cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR; - PLANE_SET_EXTENT(cfg, bview->width_el, 1); - } - } -} - static void get_linear_or_u_tiled_plane_props(const struct pan_image_view *iview, int plane_idx, unsigned mip_level, @@ -1381,6 +1328,30 @@ GENX(pan_storage_texture_emit)(const struct pan_image_view *iview, } #endif +#if PAN_ARCH >= 9 + +void +GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, + struct mali_buffer_packed *out) +{ + unsigned stride = util_format_get_blocksize(bview->format); + struct MALI_INTERNAL_CONVERSION conv = { + .memory_format = GENX(pan_format_from_pipe_format)(bview->format)->hw, + .raw = false, + }; + + pan_pack(out, BUFFER, cfg) { + cfg.type = MALI_DESCRIPTOR_TYPE_BUFFER; + cfg.buffer_type = MALI_BUFFER_TYPE_STRUCTURE; + cfg.size = bview->width_el * stride; + cfg.address = bview->base; + cfg.stride = stride; + cfg.conversion = conv; + } +} + +#else + void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, struct mali_texture_packed *out, @@ -1394,11 +1365,7 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, PIPE_SWIZZLE_W, }; -#if PAN_ARCH >= 9 - pan_emit_bview_plane(bview, payload->cpu); -#else pan_emit_bview_surface_with_stride(bview, payload->cpu); -#endif pan_pack(out, TEXTURE, cfg) { cfg.dimension = MALI_TEXTURE_DIMENSION_1D; @@ -1407,11 +1374,7 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, cfg.height = 1; cfg.sample_count = 1; cfg.swizzle = pan_translate_swizzle_4(rgba_swizzle); -#if PAN_ARCH >= 9 - cfg.texel_interleave = false; -#else cfg.texel_ordering = MALI_TEXTURE_LAYOUT_LINEAR; -#endif cfg.levels = 1; cfg.array_size = 1; @@ -1422,3 +1385,5 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, #endif } } + +#endif diff --git a/src/panfrost/lib/pan_texture.h b/src/panfrost/lib/pan_texture.h index 0b1624471c6..6c0fb6291b6 100644 --- a/src/panfrost/lib/pan_texture.h +++ b/src/panfrost/lib/pan_texture.h @@ -3,6 +3,7 @@ * Copyright (C) 2014 Broadcom * Copyright (C) 2018-2019 Alyssa Rosenzweig * Copyright (C) 2019-2020 Collabora, Ltd. + * Copyright (C) 2025 Arm Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,6 +37,7 @@ struct pan_ptr; struct mali_texture_packed; +struct mali_buffer_packed; struct pan_buffer_view; #if PAN_ARCH >= 7 @@ -91,9 +93,13 @@ void GENX(pan_tex_emit_afrc_payload_entry)( unsigned layer_or_z_slice, unsigned sample, void **payload); #endif -void -GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, - struct mali_texture_packed *out, - const struct pan_ptr *payload); +#if PAN_ARCH >= 9 +void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, + struct mali_buffer_packed *out); +#else +void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, + struct mali_texture_packed *out, + const struct pan_ptr *payload); +#endif #endif diff --git a/src/panfrost/vulkan/panvk_buffer_view.h b/src/panfrost/vulkan/panvk_buffer_view.h index fd07895d5af..0ee85a9ea55 100644 --- a/src/panfrost/vulkan/panvk_buffer_view.h +++ b/src/panfrost/vulkan/panvk_buffer_view.h @@ -23,10 +23,12 @@ struct panvk_buffer_view { struct panvk_priv_mem mem; struct { +#if PAN_ARCH >= 9 + struct mali_buffer_packed buf; +#else + /* TODO: move Bifrost over to using BufferDescriptor as well. */ struct mali_texture_packed tex; -#if PAN_ARCH < 9 - /* Valhall passes a texture descriptor to the LEA_TEX instruction. */ struct mali_attribute_buffer_packed img_attrib_buf[2]; #endif } descs; diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c index 7745d6d3ef7..43782e8ac56 100644 --- a/src/panfrost/vulkan/panvk_physical_device.c +++ b/src/panfrost/vulkan/panvk_physical_device.c @@ -768,12 +768,9 @@ get_buffer_format_features(struct panvk_physical_device *physical_device, if ((fmt.bind & PAN_BIND_VERTEX_BUFFER) && !util_format_is_srgb(pfmt)) features |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT; - if ((fmt.bind & PAN_BIND_SAMPLER_VIEW) && - !util_format_is_depth_or_stencil(pfmt)) - features |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT; - - if (fmt.bind & PAN_BIND_STORAGE_IMAGE) - features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT | + if (fmt.bind & PAN_BIND_TEXEL_BUFFER) + features |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT | + VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT | VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT; diff --git a/src/panfrost/vulkan/panvk_vX_buffer_view.c b/src/panfrost/vulkan/panvk_vX_buffer_view.c index 570c2681806..68df690e126 100644 --- a/src/panfrost/vulkan/panvk_vX_buffer_view.c +++ b/src/panfrost/vulkan/panvk_vX_buffer_view.c @@ -48,7 +48,6 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, VkBufferUsageFlags tex_usage_mask = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; #if PAN_ARCH >= 9 - /* Valhall passes a texture descriptor to LEA_TEX. */ tex_usage_mask |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; #endif @@ -63,18 +62,17 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, }; #if PAN_ARCH >= 9 - view->mem = panvk_pool_alloc_desc(&device->mempools.rw, NULL_PLANE); + view->mem = panvk_pool_alloc_desc(&device->mempools.rw, BUFFER); + GENX(pan_buffer_texture_emit)(&bview, &view->descs.buf); #else view->mem = panvk_pool_alloc_desc(&device->mempools.rw, SURFACE_WITH_STRIDE); -#endif - struct pan_ptr ptr = { .gpu = panvk_priv_mem_dev_addr(view->mem), .cpu = panvk_priv_mem_host_addr(view->mem), }; - GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr); +#endif } #if PAN_ARCH < 9 diff --git a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c index b8964bd141a..7f004f0c4b0 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c @@ -156,6 +156,7 @@ get_preload_shader(struct panvk_device *dev, pan_shader_preprocess(nir, inputs.gpu_id); pan_shader_lower_texture_early(nir, inputs.gpu_id); pan_shader_postprocess(nir, inputs.gpu_id); + pan_shader_lower_texture_late(nir, inputs.gpu_id); VkResult result = panvk_per_arch(create_internal_shader)( dev, nir, &inputs, &shader); diff --git a/src/panfrost/vulkan/panvk_vX_descriptor_set.c b/src/panfrost/vulkan/panvk_vX_descriptor_set.c index 81a598576ad..81e4ea8fb68 100644 --- a/src/panfrost/vulkan/panvk_vX_descriptor_set.c +++ b/src/panfrost/vulkan/panvk_vX_descriptor_set.c @@ -235,7 +235,7 @@ write_buffer_view_desc(struct panvk_descriptor_set *set, else write_desc(set, binding, elem, &view->descs.tex, NO_SUBDESC); #else - write_desc(set, binding, elem, &view->descs.tex, NO_SUBDESC); + write_desc(set, binding, elem, &view->descs.buf, NO_SUBDESC); #endif } diff --git a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c index 48da15f3041..2b62633f02c 100644 --- a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c +++ b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c @@ -585,12 +585,20 @@ load_tex_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim, { nir_def *loaded_size; if (dim == GLSL_SAMPLER_DIM_BUF) { +#if PAN_ARCH >= 9 + nir_def *bytes = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 4, 1, 32, ctx); + nir_def *stride = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16, 1, 32, ctx); + loaded_size = nir_idiv(b, nir_u2u32(b, bytes), nir_u2u32(b, stride)); +#else nir_def *tex_w = load_resource_deref_desc( b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 1, 16, ctx); /* S dimension is 16 bits wide. We don't support combining S,T dimensions * to allow large buffers yet. */ loaded_size = nir_iadd_imm(b, nir_u2u32(b, tex_w), 1); +#endif } else { nir_def *tex_w_h = load_resource_deref_desc( b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 2, 16, ctx); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 8128f80dc8e..386d799a9de 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -920,6 +920,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics); pan_shader_postprocess(nir, compile_input->gpu_id); + pan_shader_lower_texture_late(nir, compile_input->gpu_id); if (stage == MESA_SHADER_VERTEX) NIR_PASS(_, nir, nir_shader_intrinsics_pass, panvk_lower_load_vs_input,