pan/v9+: Make texel buffers use BufferDescriptor

Texel buffers are currently described by a TextureDescriptor,which leads
to restrictive limits on size and alignment.
These limits can be avoided by using a BufferDescriptor instead.

This requires first embedding a ConversionDescriptor into some of the
currently empty space of the BufferDescriptor, and modifying the
compiler so that instead of outputting TEX_FETCH, it will:

1. Load the ConversionDescriptor with LD_PKA
2. Get the buffer address with LEA_BUF[_IMM]
3. Use LD_CVT to get the value

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37007>
This commit is contained in:
Ludvig Lindau 2025-08-26 12:04:34 +00:00 committed by Marge Bot
parent af28c453ba
commit 4573110e4e
13 changed files with 88 additions and 90 deletions

View file

@ -78,11 +78,19 @@ struct panfrost_sampler_state {
};
/* Misnomer: Sampler view corresponds to textures, not samplers */
struct mali_buffer_packed;
struct panfrost_sampler_view {
struct pipe_sampler_view base;
struct panfrost_pool_ref state;
struct mali_texture_packed bifrost_descriptor;
#if PAN_ARCH >= 9
union {
struct mali_buffer_packed bifrost_buf_descriptor;
struct mali_texture_packed bifrost_tex_descriptor;
};
#else
/* TODO: move Bifrost over to using BufferDescriptor as well. */
struct mali_texture_packed bifrost_tex_descriptor;
#endif
uint64_t texture_bo;
uint64_t texture_size;
uint64_t modifier;
@ -1001,7 +1009,7 @@ panfrost_emit_images(struct panfrost_batch *batch, mesa_shader_stage stage)
};
panfrost_update_sampler_view(&view, &ctx->base);
out[i] = view.bifrost_descriptor;
out[i] = view.bifrost_tex_descriptor;
panfrost_track_image_access(batch, stage, image);
}
@ -1713,8 +1721,6 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
panfrost_translate_texture_dimension(so->base.target);
if (so->base.target == PIPE_BUFFER) {
const struct util_format_description *desc =
util_format_description(format);
struct pan_buffer_view bview = {
.format = format,
.width_el =
@ -1723,15 +1729,20 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
.base = prsrc->plane.base + so->base.u.buf.offset,
};
#if PAN_ARCH >= 9
void *tex = &so->bifrost_buf_descriptor;
GENX(pan_buffer_texture_emit)(&bview, tex);
return;
#else
const struct util_format_description *desc =
util_format_description(format);
if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
bview.astc.narrow =
so->base.astc_decode_format == PIPE_ASTC_DECODE_FORMAT_UNORM8;
bview.astc.hdr = util_format_is_astc_hdr(format);
}
#if PAN_ARCH >= 9
unsigned payload_size = pan_size(NULL_PLANE);
#elif PAN_ARCH >= 6
#if PAN_ARCH >= 6
unsigned payload_size = pan_size(SURFACE_WITH_STRIDE);
#else
unsigned payload_size = pan_size(TEXTURE) + pan_size(SURFACE_WITH_STRIDE);
@ -1748,7 +1759,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
so->state = panfrost_pool_take_ref(pool, payload.gpu);
void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu;
void *tex = (PAN_ARCH >= 6) ? &so->bifrost_tex_descriptor : payload.cpu;
if (PAN_ARCH <= 5) {
payload.cpu += pan_size(TEXTURE);
@ -1757,6 +1768,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
GENX(pan_buffer_texture_emit)(&bview, tex, &payload);
return;
#endif
}
unsigned first_level = so->base.u.tex.first_level;
@ -1813,7 +1825,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
so->state = panfrost_pool_take_ref(pool, payload.gpu);
void *tex = (PAN_ARCH >= 6) ? &so->bifrost_descriptor : payload.cpu;
void *tex = (PAN_ARCH >= 6) ? &so->bifrost_tex_descriptor : payload.cpu;
if (PAN_ARCH <= 5) {
payload.cpu += pan_size(TEXTURE);
@ -1913,7 +1925,7 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
struct panfrost_resource *rsrc = pan_resource(pview->texture);
panfrost_update_sampler_view(view, &ctx->base);
out[i] = view->bifrost_descriptor;
out[i] = view->bifrost_tex_descriptor;
panfrost_batch_read_rsrc(batch, rsrc, stage);
panfrost_batch_add_bo(batch, view->state.bo, stage);

View file

@ -225,6 +225,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
/* Lower resource indices */
NIR_PASS(_, s, panfrost_nir_lower_res_indices, &inputs);
pan_shader_lower_texture_late(s, inputs.gpu_id);
if (dev->arch >= 9) {
inputs.valhall.use_ld_var_buf = panfrost_use_ld_var_buf(s);

View file

@ -426,6 +426,7 @@ main(int argc, const char **argv)
pan_shader_preprocess(s, inputs.gpu_id);
pan_shader_lower_texture_early(s, inputs.gpu_id);
pan_shader_postprocess(s, inputs.gpu_id);
pan_shader_lower_texture_late(s, inputs.gpu_id);
NIR_PASS(_, s, nir_opt_deref);
NIR_PASS(_, s, nir_lower_vars_to_ssa);

View file

@ -1687,6 +1687,8 @@ static void
bi_emit_image_load(bi_builder *b, nir_intrinsic_instr *instr)
{
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) &&
"Texel buffers should already have been lowered");
unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
bool array =
nir_intrinsic_image_array(instr) || dim == GLSL_SAMPLER_DIM_CUBE;
@ -1737,6 +1739,8 @@ static void
bi_emit_lea_image_to(bi_builder *b, bi_index dest, nir_intrinsic_instr *instr)
{
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) &&
"Texel buffers should already have been lowered");
bool array =
nir_intrinsic_image_array(instr) || dim == GLSL_SAMPLER_DIM_CUBE;
unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
@ -4680,6 +4684,8 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
break;
case nir_texop_txf:
case nir_texop_txf_ms: {
assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF &&
"Texel buffers should already have been lowered");
/* On Valhall, TEX_FETCH doesn't have CUBE support. This is not a problem
* as a cube is just a 2D array in any cases. */
if (dim == BI_DIMENSION_CUBE)

View file

@ -357,59 +357,6 @@ translate_superblock_size(uint64_t modifier)
(cfg__).slice_stride = size__
#endif
static void
pan_emit_bview_plane(const struct pan_buffer_view *bview, void *payload)
{
const struct util_format_description *desc =
util_format_description(bview->format);
uint64_t size =
(uint64_t)util_format_get_blocksize(bview->format) * bview->width_el;
if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
bool srgb = (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
/* sRGB formats decode to RGBA8 sRGB, which is narrow.
*
* Non-sRGB formats decode to RGBA16F which is wide except if decode
* precision is set to GL_RGBA8 for that texture.
*/
bool wide = !srgb && !bview->astc.narrow;
if (desc->block.depth > 1) {
pan_cast_and_pack(payload, ASTC_3D_PLANE, cfg) {
cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR;
cfg.decode_hdr = bview->astc.hdr;
cfg.decode_wide = wide;
cfg.block_width = pan_astc_dim_3d(desc->block.width);
cfg.block_height = pan_astc_dim_3d(desc->block.height);
cfg.block_depth = pan_astc_dim_3d(desc->block.depth);
cfg.pointer = bview->base;
PLANE_SET_SIZE(cfg, size);
PLANE_SET_EXTENT(cfg, bview->width_el, 1);
}
} else {
pan_cast_and_pack(payload, ASTC_2D_PLANE, cfg) {
cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR;
cfg.decode_hdr = bview->astc.hdr;
cfg.decode_wide = wide;
cfg.block_width = pan_astc_dim_2d(desc->block.width);
cfg.block_height = pan_astc_dim_2d(desc->block.height);
PLANE_SET_SIZE(cfg, size);
cfg.pointer = bview->base;
PLANE_SET_EXTENT(cfg, bview->width_el, 1);
}
}
} else {
pan_cast_and_pack(payload, GENERIC_PLANE, cfg) {
cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR;
cfg.clump_format = pan_clump_format(bview->format);
PLANE_SET_SIZE(cfg, size);
cfg.pointer = bview->base;
cfg.clump_ordering = MALI_CLUMP_ORDERING_LINEAR;
PLANE_SET_EXTENT(cfg, bview->width_el, 1);
}
}
}
static void
get_linear_or_u_tiled_plane_props(const struct pan_image_view *iview,
int plane_idx, unsigned mip_level,
@ -1381,6 +1328,30 @@ GENX(pan_storage_texture_emit)(const struct pan_image_view *iview,
}
#endif
#if PAN_ARCH >= 9
void
GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_buffer_packed *out)
{
unsigned stride = util_format_get_blocksize(bview->format);
struct MALI_INTERNAL_CONVERSION conv = {
.memory_format = GENX(pan_format_from_pipe_format)(bview->format)->hw,
.raw = false,
};
pan_pack(out, BUFFER, cfg) {
cfg.type = MALI_DESCRIPTOR_TYPE_BUFFER;
cfg.buffer_type = MALI_BUFFER_TYPE_STRUCTURE;
cfg.size = bview->width_el * stride;
cfg.address = bview->base;
cfg.stride = stride;
cfg.conversion = conv;
}
}
#else
void
GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_texture_packed *out,
@ -1394,11 +1365,7 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
PIPE_SWIZZLE_W,
};
#if PAN_ARCH >= 9
pan_emit_bview_plane(bview, payload->cpu);
#else
pan_emit_bview_surface_with_stride(bview, payload->cpu);
#endif
pan_pack(out, TEXTURE, cfg) {
cfg.dimension = MALI_TEXTURE_DIMENSION_1D;
@ -1407,11 +1374,7 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
cfg.height = 1;
cfg.sample_count = 1;
cfg.swizzle = pan_translate_swizzle_4(rgba_swizzle);
#if PAN_ARCH >= 9
cfg.texel_interleave = false;
#else
cfg.texel_ordering = MALI_TEXTURE_LAYOUT_LINEAR;
#endif
cfg.levels = 1;
cfg.array_size = 1;
@ -1422,3 +1385,5 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
#endif
}
}
#endif

View file

@ -3,6 +3,7 @@
* Copyright (C) 2014 Broadcom
* Copyright (C) 2018-2019 Alyssa Rosenzweig
* Copyright (C) 2019-2020 Collabora, Ltd.
* Copyright (C) 2025 Arm Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -36,6 +37,7 @@
struct pan_ptr;
struct mali_texture_packed;
struct mali_buffer_packed;
struct pan_buffer_view;
#if PAN_ARCH >= 7
@ -91,9 +93,13 @@ void GENX(pan_tex_emit_afrc_payload_entry)(
unsigned layer_or_z_slice, unsigned sample, void **payload);
#endif
void
GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_texture_packed *out,
const struct pan_ptr *payload);
#if PAN_ARCH >= 9
void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_buffer_packed *out);
#else
void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_texture_packed *out,
const struct pan_ptr *payload);
#endif
#endif

View file

@ -23,10 +23,12 @@ struct panvk_buffer_view {
struct panvk_priv_mem mem;
struct {
#if PAN_ARCH >= 9
struct mali_buffer_packed buf;
#else
/* TODO: move Bifrost over to using BufferDescriptor as well. */
struct mali_texture_packed tex;
#if PAN_ARCH < 9
/* Valhall passes a texture descriptor to the LEA_TEX instruction. */
struct mali_attribute_buffer_packed img_attrib_buf[2];
#endif
} descs;

View file

@ -768,12 +768,9 @@ get_buffer_format_features(struct panvk_physical_device *physical_device,
if ((fmt.bind & PAN_BIND_VERTEX_BUFFER) && !util_format_is_srgb(pfmt))
features |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT;
if ((fmt.bind & PAN_BIND_SAMPLER_VIEW) &&
!util_format_is_depth_or_stencil(pfmt))
features |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT;
if (fmt.bind & PAN_BIND_STORAGE_IMAGE)
features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT |
if (fmt.bind & PAN_BIND_TEXEL_BUFFER)
features |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT |
VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT |
VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT |
VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT;

View file

@ -48,7 +48,6 @@ panvk_per_arch(CreateBufferView)(VkDevice _device,
VkBufferUsageFlags tex_usage_mask = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
#if PAN_ARCH >= 9
/* Valhall passes a texture descriptor to LEA_TEX. */
tex_usage_mask |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
#endif
@ -63,18 +62,17 @@ panvk_per_arch(CreateBufferView)(VkDevice _device,
};
#if PAN_ARCH >= 9
view->mem = panvk_pool_alloc_desc(&device->mempools.rw, NULL_PLANE);
view->mem = panvk_pool_alloc_desc(&device->mempools.rw, BUFFER);
GENX(pan_buffer_texture_emit)(&bview, &view->descs.buf);
#else
view->mem =
panvk_pool_alloc_desc(&device->mempools.rw, SURFACE_WITH_STRIDE);
#endif
struct pan_ptr ptr = {
.gpu = panvk_priv_mem_dev_addr(view->mem),
.cpu = panvk_priv_mem_host_addr(view->mem),
};
GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr);
#endif
}
#if PAN_ARCH < 9

View file

@ -156,6 +156,7 @@ get_preload_shader(struct panvk_device *dev,
pan_shader_preprocess(nir, inputs.gpu_id);
pan_shader_lower_texture_early(nir, inputs.gpu_id);
pan_shader_postprocess(nir, inputs.gpu_id);
pan_shader_lower_texture_late(nir, inputs.gpu_id);
VkResult result = panvk_per_arch(create_internal_shader)(
dev, nir, &inputs, &shader);

View file

@ -235,7 +235,7 @@ write_buffer_view_desc(struct panvk_descriptor_set *set,
else
write_desc(set, binding, elem, &view->descs.tex, NO_SUBDESC);
#else
write_desc(set, binding, elem, &view->descs.tex, NO_SUBDESC);
write_desc(set, binding, elem, &view->descs.buf, NO_SUBDESC);
#endif
}

View file

@ -585,12 +585,20 @@ load_tex_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim,
{
nir_def *loaded_size;
if (dim == GLSL_SAMPLER_DIM_BUF) {
#if PAN_ARCH >= 9
nir_def *bytes = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 4, 1, 32, ctx);
nir_def *stride = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16, 1, 32, ctx);
loaded_size = nir_idiv(b, nir_u2u32(b, bytes), nir_u2u32(b, stride));
#else
nir_def *tex_w = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 1, 16, ctx);
/* S dimension is 16 bits wide. We don't support combining S,T dimensions
* to allow large buffers yet. */
loaded_size = nir_iadd_imm(b, nir_u2u32(b, tex_w), 1);
#endif
} else {
nir_def *tex_w_h = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 2, 16, ctx);

View file

@ -920,6 +920,7 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
glsl_type_size, nir_lower_io_use_interpolated_input_intrinsics);
pan_shader_postprocess(nir, compile_input->gpu_id);
pan_shader_lower_texture_late(nir, compile_input->gpu_id);
if (stage == MESA_SHADER_VERTEX)
NIR_PASS(_, nir, nir_shader_intrinsics_pass, panvk_lower_load_vs_input,