From a21ee564e2051e5e6827ce333da1f85b8b828e1d Mon Sep 17 00:00:00 2001 From: Lars-Ivar Hesselberg Simonsen Date: Tue, 11 Nov 2025 12:35:49 +0100 Subject: [PATCH] pan/bi: Make texel buffers use Attribute Buffers Texel buffers are currently described by a TextureDescriptor, which leads to restrictive limits on size and alignment. These limits can be avoided by using AttributeDescriptors + AttributeBufferDescriptors instead. This requires us to access texel buffers using attributes rather than textures, which involves setting up AttributeDescriptors and AttributeBufferDescriptors in their respective allocations, rather than the previous TextureDescriptors in the texture allocation. This is already done for images, so we simply place the texel buffer attributes after the images and ensure the indexing if offset correctly. Accessing a texel buffer thus becomes: 1. Get the buffer address and ConversionDescriptor with LEA_ATTR[_IMM] 2. Use LD_CVT to get the value Reviewed-by: Boris Brezillon Part-of: --- src/gallium/drivers/panfrost/pan_cmdstream.c | 207 ++++++++++++++---- src/gallium/drivers/panfrost/pan_context.c | 14 +- src/gallium/drivers/panfrost/pan_context.h | 3 + src/gallium/drivers/panfrost/pan_shader.c | 19 +- .../compiler/bifrost/bifrost_compile.c | 14 +- src/panfrost/compiler/pan_compiler.c | 7 +- src/panfrost/compiler/pan_compiler.h | 1 + src/panfrost/lib/pan_desc.h | 1 + src/panfrost/lib/pan_texture.c | 30 ++- src/panfrost/lib/pan_texture.h | 4 + src/panfrost/vulkan/panvk_buffer_view.h | 10 +- src/panfrost/vulkan/panvk_vX_buffer_view.c | 77 ++----- src/panfrost/vulkan/panvk_vX_descriptor_set.c | 14 +- .../vulkan/panvk_vX_nir_lower_descriptors.c | 22 +- src/panfrost/vulkan/panvk_vX_shader.c | 6 +- 15 files changed, 281 insertions(+), 148 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 795ce1401d2..9e14641ce94 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -88,8 +88,10 @@ struct panfrost_sampler_view { struct mali_texture_packed bifrost_tex_descriptor; }; #else - /* TODO: move Bifrost over to using BufferDescriptor as well. */ - struct mali_texture_packed bifrost_tex_descriptor; + union { + uint64_t texel_buffer_base_ptr; + struct mali_texture_packed bifrost_tex_descriptor; + }; #endif uint64_t texture_bo; uint64_t texture_size; @@ -1709,6 +1711,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, panfrost_translate_texture_dimension(so->base.target); if (so->base.target == PIPE_BUFFER) { +#if PAN_ARCH >= 9 struct pan_buffer_view bview = { .format = format, .width_el = @@ -1717,11 +1720,21 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, .base = prsrc->plane.base + so->base.u.buf.offset, }; -#if PAN_ARCH >= 9 void *tex = &so->bifrost_buf_descriptor; GENX(pan_buffer_texture_emit)(&bview, tex); - return; +#elif PAN_ARCH >= 6 + /* For Bifrost, we'll generate the Attribute Buffer Descriptor when + * setting up attributes, so only store the base pointer at this point. */ + so->texel_buffer_base_ptr = prsrc->plane.base; #else + struct pan_buffer_view bview = { + .format = format, + .width_el = + MIN2(so->base.u.buf.size / util_format_get_blocksize(format), + pan_get_max_texel_buffer_elements(PAN_ARCH)), + .base = prsrc->plane.base + so->base.u.buf.offset, + }; + const struct util_format_description *desc = util_format_description(format); if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { @@ -1730,11 +1743,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, bview.astc.hdr = util_format_is_astc_hdr(format); } -#if PAN_ARCH >= 6 - unsigned payload_size = pan_size(SURFACE_WITH_STRIDE); -#else unsigned payload_size = pan_size(TEXTURE) + pan_size(SURFACE_WITH_STRIDE); -#endif struct panfrost_pool *pool = so->pool ?: &ctx->descs; struct pan_ptr payload = @@ -1747,16 +1756,13 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, so->state = panfrost_pool_take_ref(pool, payload.gpu); - void *tex = (PAN_ARCH >= 6) ? &so->bifrost_tex_descriptor : payload.cpu; - - if (PAN_ARCH <= 5) { - payload.cpu += pan_size(TEXTURE); - payload.gpu += pan_size(TEXTURE); - } + void *tex = payload.cpu; + payload.cpu += pan_size(TEXTURE); + payload.gpu += pan_size(TEXTURE); GENX(pan_buffer_texture_emit)(&bview, tex, &payload); - return; #endif + return; } unsigned first_level = so->base.u.tex.first_level; @@ -1913,7 +1919,16 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch, struct panfrost_resource *rsrc = pan_resource(pview->texture); panfrost_update_sampler_view(view, &ctx->base); + +#if PAN_ARCH < 9 + if (pview->target == PIPE_BUFFER) + /* Texel buffers will be emitted as attributes */ + panfrost_emit_null_texture(&out[i]); + else + out[i] = view->bifrost_tex_descriptor; +#else out[i] = view->bifrost_tex_descriptor; +#endif panfrost_batch_read_rsrc(batch, rsrc, stage); panfrost_batch_add_bo(batch, view->state.bo, stage); @@ -1991,10 +2006,10 @@ panfrost_emit_sampler_descriptors(struct panfrost_batch *batch, */ static void emit_image_attribs(struct panfrost_context *ctx, mesa_shader_stage shader, - struct mali_attribute_packed *attribs, + struct mali_attribute_packed *attribs, uint64_t image_mask, unsigned first_image_buf_index) { - unsigned last_bit = util_last_bit(ctx->image_mask[shader]); + unsigned last_bit = util_last_bit(image_mask); for (unsigned i = 0; i < last_bit; ++i) { enum pipe_format format = ctx->images[shader][i].format; @@ -2008,6 +2023,27 @@ emit_image_attribs(struct panfrost_context *ctx, mesa_shader_stage shader, } } +#if PAN_ARCH >= 6 +static void +emit_texbuf_attribs(struct panfrost_context *ctx, mesa_shader_stage shader, + struct mali_attribute_packed *attribs, + unsigned first_texel_buf_index) +{ + unsigned i; + BITSET_FOREACH_SET(i, ctx->texture_buffer[shader].mask, + PIPE_MAX_SHADER_SAMPLER_VIEWS) { + struct panfrost_sampler_view *view = ctx->sampler_views[shader][i]; + assert(view); + enum pipe_format format = view->base.format; + pan_pack(attribs + i, ATTRIBUTE, cfg) { + cfg.buffer_index = first_texel_buf_index + i; + cfg.offset_enable = false; + cfg.format = GENX(pan_format_from_pipe_format)(format)->hw; + } + } +} +#endif + static enum mali_attribute_type pan_modifier_to_attr_type(uint64_t modifier) { @@ -2023,10 +2059,10 @@ pan_modifier_to_attr_type(uint64_t modifier) static void emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, - struct mali_attribute_buffer_packed *bufs) + struct mali_attribute_buffer_packed *bufs, uint64_t image_mask) { struct panfrost_context *ctx = batch->ctx; - unsigned last_bit = util_last_bit(ctx->image_mask[shader]); + unsigned last_bit = util_last_bit(image_mask); for (unsigned i = 0; i < last_bit; ++i) { struct pipe_image_view *image = &ctx->images[shader][i]; @@ -2065,6 +2101,18 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, panfrost_track_image_access(batch, shader, image); +#if MALI_ARCH >= 6 + if (is_buffer) { + pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.pointer = rsrc->plane.base + offset; + cfg.stride = util_format_get_blocksize(image->format); + cfg.size = rsrc->base.width0; + } + continue; + } +#endif + pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) { cfg.type = pan_modifier_to_attr_type(rsrc->image.props.modifier); cfg.pointer = rsrc->plane.base + offset; @@ -2074,6 +2122,7 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, is_buffer ? 0 : image->u.tex.level); } +#if MALI_ARCH <= 5 if (is_buffer) { pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { @@ -2084,6 +2133,7 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, continue; } +#endif pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { @@ -2122,9 +2172,31 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, } } +#if PAN_ARCH >= 6 +static void +emit_texbuf_bufs(struct panfrost_context *ctx, mesa_shader_stage shader, + struct mali_attribute_buffer_packed *bufs) +{ + unsigned i; + BITSET_FOREACH_SET(i, ctx->texture_buffer[shader].mask, + PIPE_MAX_SHADER_SAMPLER_VIEWS) { + struct panfrost_sampler_view *view = ctx->sampler_views[shader][i]; + assert(view); + struct pipe_sampler_view *pview = &view->base; + + pan_pack(bufs + i, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.pointer = view->texel_buffer_base_ptr + pview->u.buf.offset; + cfg.stride = util_format_get_blocksize(pview->format); + cfg.size = pview->u.buf.size; + } + } +} +#endif + static uint64_t -panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers, - mesa_shader_stage type) +panfrost_emit_image_texbuf_attribs(struct panfrost_batch *batch, + uint64_t *buffers, mesa_shader_stage type) { struct panfrost_context *ctx = batch->ctx; struct panfrost_compiled_shader *shader = ctx->prog[type]; @@ -2134,9 +2206,23 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers, return 0; } - unsigned attr_count = util_last_bit(ctx->image_mask[type]); - /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */ - unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0); +#if PAN_ARCH >= 6 + /* For Bifrost, we can only output images if they are used in the shader to + * ensure the offset for texel buffers is correct. Therefore, we check the + * mask here and ensure we emit attribs if the shader changes. */ + uint64_t image_mask = ctx->image_mask[type] & shader->info.images_used; + unsigned image_count = util_last_bit(image_mask); + unsigned attr_count = + image_count + BITSET_LAST_BIT(ctx->texture_buffer[type].mask); +#else + uint64_t image_mask = ctx->image_mask[type]; + unsigned image_count = util_last_bit(image_mask); + unsigned attr_count = image_count; +#endif + /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D, so we need to + * use two buffers per image (counted once in attr_count, then once again in + * image_count) */ + unsigned buf_count = attr_count + image_count + (PAN_ARCH >= 6 ? 1 : 0); struct pan_ptr bufs = pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER); @@ -2144,13 +2230,17 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers, struct pan_ptr attribs = pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE); - emit_image_attribs(ctx, type, attribs.cpu, 0); - emit_image_bufs(batch, type, bufs.cpu); + emit_image_attribs(ctx, type, attribs.cpu, image_mask, 0); + emit_image_bufs(batch, type, bufs.cpu, image_mask); + +#if PAN_ARCH >= 6 + /* Texel buffers come after the images, which require two buffers per image. */ + unsigned image_buf_offset = image_count * 2; + emit_texbuf_attribs(ctx, type, attribs.cpu + image_count, image_buf_offset); + emit_texbuf_bufs(ctx, type, bufs.cpu + image_buf_offset); /* We need an empty attrib buf to stop the prefetching on Bifrost */ -#if PAN_ARCH >= 6 - struct mali_attribute_buffer_packed *attrib_bufs = bufs.cpu; - + struct mali_attribute_buffer_packed *attrib_bufs = bufs.cpu; pan_pack(&attrib_bufs[buf_count - 1], ATTRIBUTE_BUFFER, cfg) ; #endif @@ -2166,16 +2256,28 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers) struct panfrost_vertex_state *so = ctx->vertex; struct panfrost_compiled_shader *vs = ctx->prog[MESA_SHADER_VERTEX]; bool instanced = ctx->instance_count > 1; - uint32_t image_mask = ctx->image_mask[MESA_SHADER_VERTEX]; + + unsigned nr_texbuf; +#if PAN_ARCH >= 6 + /* For Bifrost, we can only output images if they are used in the shader to + * ensure the offset for texel buffers is correct. Therefore, we check the + * mask here and ensure we emit attribs if the shader changes. */ + uint64_t image_mask = + ctx->image_mask[MESA_SHADER_VERTEX] & vs->info.images_used; + nr_texbuf = BITSET_LAST_BIT(ctx->texture_buffer[MESA_SHADER_VERTEX].mask); +#else + uint64_t image_mask = ctx->image_mask[MESA_SHADER_VERTEX]; + nr_texbuf = 0; +#endif unsigned nr_images = util_last_bit(image_mask); /* Worst case: everything is NPOT, which is only possible if instancing * is enabled. Otherwise single record is gauranteed. - * Also, we allocate more memory than what's needed here if either instancing - * is enabled or images are present, this can be improved. */ - unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1; + * Images always use two buffer descriptors. */ + unsigned attrib_bufs = + instanced ? so->nr_bufs * 2 : ALIGN_POT(so->nr_bufs, 2); unsigned nr_bufs = - ((so->nr_bufs + nr_images) * bufs_per_attrib) + (PAN_ARCH >= 6 ? 1 : 0); + attrib_bufs + (nr_images * 2) + nr_texbuf + (PAN_ARCH >= 6 ? 1 : 0); unsigned count = vs->info.attribute_count; @@ -2326,12 +2428,20 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers) if (nr_images) { k = ALIGN_POT(k, 2); - emit_image_attribs(ctx, MESA_SHADER_VERTEX, out + so->num_elements, k); - emit_image_bufs(batch, MESA_SHADER_VERTEX, bufs + k); - k += (util_last_bit(ctx->image_mask[MESA_SHADER_VERTEX]) * 2); + emit_image_attribs(ctx, MESA_SHADER_VERTEX, out + so->num_elements, + image_mask, k); + emit_image_bufs(batch, MESA_SHADER_VERTEX, bufs + k, image_mask); + k += (nr_images * 2); } #if PAN_ARCH >= 6 + if (nr_texbuf) { + emit_texbuf_attribs(ctx, MESA_SHADER_VERTEX, + out + so->num_elements + nr_images, k); + emit_texbuf_bufs(ctx, MESA_SHADER_VERTEX, bufs + k); + k += nr_texbuf; + } + /* We need an empty attrib buf to stop the prefetching on Bifrost */ pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg) ; @@ -3040,12 +3150,20 @@ panfrost_update_shader_state(struct panfrost_batch *batch, batch->rsd[st] = panfrost_emit_frag_shader_meta(batch); } +#if PAN_ARCH >= 6 + /* Bifrost needs to place texel buffers after the image attributes, so we + * need to emit them if textures or the shader is dirty. */ + unsigned attribs_dirty_mask = + PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_TEXTURE | PAN_DIRTY_STAGE_SHADER; +#else + unsigned attribs_dirty_mask = PAN_DIRTY_STAGE_IMAGE; +#endif /* Vertex shaders need to mix vertex data and image descriptors in the * attribute array. This is taken care of in panfrost_update_state_3d(). */ - if (st != MESA_SHADER_VERTEX && (dirty & PAN_DIRTY_STAGE_IMAGE)) { + if (st != MESA_SHADER_VERTEX && (dirty & attribs_dirty_mask)) { batch->attribs[st] = - panfrost_emit_image_attribs(batch, &batch->attrib_bufs[st], st); + panfrost_emit_image_texbuf_attribs(batch, &batch->attrib_bufs[st], st); } #endif } @@ -3086,12 +3204,19 @@ panfrost_update_state_3d(struct panfrost_batch *batch) bool attr_offsetted_by_instance_base = vstate->attr_depends_on_base_instance_mask & BITFIELD_MASK(vs->info.attributes_read_count); +#if PAN_ARCH >= 6 + /* Bifrost needs to place texel buffers after the image attributes, so we + * need to emit them if textures or the shader is dirty. */ + unsigned attribs_dirty_mask = + PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_TEXTURE | PAN_DIRTY_STAGE_SHADER; +#else + unsigned attribs_dirty_mask = PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_SHADER; +#endif /* Vertex data, vertex shader and images accessed by the vertex shader have * an impact on the attributes array, we need to re-emit anytime one of these * parameters changes. */ - if ((dirty & PAN_DIRTY_VERTEX) || - (vt_shader_dirty & (PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_SHADER)) || + if ((dirty & PAN_DIRTY_VERTEX) || (vt_shader_dirty & attribs_dirty_mask) || attr_offsetted_by_instance_base) { batch->attribs[MESA_SHADER_VERTEX] = panfrost_emit_vertex_data( batch, &batch->attrib_bufs[MESA_SHADER_VERTEX]); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index b65d56b1742..62246fc56ee 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -397,12 +397,18 @@ panfrost_set_sampler_views(struct pipe_context *pctx, if (view) new_nr = p + 1; + if (view && view->target == PIPE_BUFFER) + BITSET_SET(ctx->texture_buffer[shader].mask, p); + else + BITSET_CLEAR(ctx->texture_buffer[shader].mask, p); + pipe_sampler_view_reference( (struct pipe_sampler_view **)&ctx->sampler_views[shader][p], view); } for (; i < num_views + unbind_num_trailing_slots; i++) { unsigned p = i + start_slot; + BITSET_CLEAR(ctx->texture_buffer[shader].mask, p); pipe_sampler_view_reference( (struct pipe_sampler_view **)&ctx->sampler_views[shader][p], NULL); } @@ -417,8 +423,14 @@ panfrost_set_sampler_views(struct pipe_context *pctx, * set sampler views */ if (new_nr == 0) { for (i = 0; i < start_slot; ++i) { - if (ctx->sampler_views[shader][i]) + struct pipe_sampler_view *view = + (struct pipe_sampler_view *)ctx->sampler_views[shader][i]; + if (view) new_nr = i + 1; + if (view && view->target == PIPE_BUFFER) + BITSET_SET(ctx->texture_buffer[shader].mask, i); + else + BITSET_CLEAR(ctx->texture_buffer[shader].mask, i); } } diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index e7a558c3690..cd2dc980f5d 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -206,6 +206,9 @@ struct panfrost_context { struct panfrost_sampler_view *sampler_views[MESA_SHADER_STAGES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; unsigned sampler_view_count[MESA_SHADER_STAGES]; + struct { + BITSET_DECLARE(mask, PIPE_MAX_SHADER_SAMPLER_VIEWS); + } texture_buffer[MESA_SHADER_STAGES]; struct blitter_context *blitter; diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 9b6fc0f34e9..389ce021cb8 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -559,13 +559,20 @@ panfrost_create_shader_state(struct pipe_context *pctx, so->noperspective_varyings = pan_nir_collect_noperspective_varyings_fs(nir); - /* Vertex shaders get passed images through the vertex attribute descriptor - * array. We need to add an offset to all image intrinsics so they point - * to the right attribute. - */ + unsigned attrib_offset = 0; if (nir->info.stage == MESA_SHADER_VERTEX && dev->arch <= 7) { - NIR_PASS(_, nir, pan_nir_lower_image_index, - util_bitcount64(nir->info.inputs_read)); + /* Vertex shaders get passed images through the vertex attribute + * descriptor array. We need to add an offset to all image intrinsics so + * they point to the right attribute. + */ + attrib_offset += util_bitcount64(nir->info.inputs_read); + NIR_PASS(_, nir, pan_nir_lower_image_index, attrib_offset); + } + if (dev->arch >= 6 && dev->arch <= 7) { + /* Bifrost needs to use attributes to access texel buffers. We place these + * after images, which are also accessed using attributes. */ + attrib_offset += BITSET_LAST_BIT(nir->info.images_used); + NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch_index, attrib_offset); } /* If this shader uses transform feedback, compile the transform diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index a061e479327..28b44bd641b 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -1605,7 +1605,7 @@ static void bi_emit_image_load(bi_builder *b, nir_intrinsic_instr *instr) { enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); - assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) && + assert((dim != GLSL_SAMPLER_DIM_BUF) && "Texel buffers should already have been lowered"); unsigned coord_comps = nir_image_intrinsic_coord_components(instr); bool array = @@ -1657,7 +1657,7 @@ static void bi_emit_lea_image_to(bi_builder *b, bi_index dest, nir_intrinsic_instr *instr) { enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); - assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) && + assert((dim != GLSL_SAMPLER_DIM_BUF) && "Texel buffers should already have been lowered"); bool array = nir_intrinsic_image_array(instr) || dim == GLSL_SAMPLER_DIM_CUBE; @@ -4242,6 +4242,10 @@ enum bifrost_tex_dreg { static void bi_emit_texc(bi_builder *b, nir_tex_instr *instr) { + assert((instr->op != nir_texop_txf || + instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) && + "Texel buffers should already have been lowered"); + struct bifrost_texture_operation desc = { .op = bi_tex_op(instr->op), .offset_or_bias_disable = false, /* TODO */ @@ -6600,10 +6604,8 @@ pan_nir_lower_buf_image_access(nir_shader *shader, unsigned arch) void bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id) { - if (pan_arch(gpu_id) >= 9) { - NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, pan_arch(gpu_id)); - NIR_PASS(_, nir, pan_nir_lower_buf_image_access, pan_arch(gpu_id)); - } + NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, pan_arch(gpu_id)); + NIR_PASS(_, nir, pan_nir_lower_buf_image_access, pan_arch(gpu_id)); } static int diff --git a/src/panfrost/compiler/pan_compiler.c b/src/panfrost/compiler/pan_compiler.c index 7ce62c5b297..509c9aac681 100644 --- a/src/panfrost/compiler/pan_compiler.c +++ b/src/panfrost/compiler/pan_compiler.c @@ -112,7 +112,8 @@ pan_nir_lower_texture_late(nir_shader *nir, unsigned gpu_id) { /* This must be called after any lowering of resource indices * (panfrost_nir_lower_res_indices / panvk_per_arch(nir_lower_descriptors)) - */ + * and lowering of attribute indices (pan_nir_lower_image_index / + * pan_nir_lower_texel_buffer_fetch_index) */ if (pan_arch(gpu_id) >= 6) bifrost_lower_texture_late_nir(nir, gpu_id); } @@ -281,7 +282,11 @@ pan_shader_update_info(struct pan_shader_info *info, nir_shader *s, } info->outputs_written = s->info.outputs_written; + info->images_used = + s->info.images_used[0] | ((uint64_t)s->info.images_used[1]) << 32; info->attribute_count += BITSET_LAST_BIT(s->info.images_used); + if (arch >= 6 && arch < 9) + info->attribute_count += BITSET_LAST_BIT(s->info.texture_buffers); info->writes_global = s->info.writes_memory; info->ubo_count = s->info.num_ubos; diff --git a/src/panfrost/compiler/pan_compiler.h b/src/panfrost/compiler/pan_compiler.h index 1c4c789a7c1..1469c5a6b77 100644 --- a/src/panfrost/compiler/pan_compiler.h +++ b/src/panfrost/compiler/pan_compiler.h @@ -332,6 +332,7 @@ struct pan_shader_info { unsigned attributes_read_count; unsigned attribute_count; unsigned attributes_read; + uint64_t images_used; struct { unsigned input_count; diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h index af067b0cad5..4f09d0759b3 100644 --- a/src/panfrost/lib/pan_desc.h +++ b/src/panfrost/lib/pan_desc.h @@ -42,6 +42,7 @@ struct pan_buffer_view { } astc; unsigned width_el; uint64_t base; + uint32_t offset; }; struct pan_compute_dim { diff --git a/src/panfrost/lib/pan_texture.c b/src/panfrost/lib/pan_texture.c index a3a5f850f58..62f7fd9e6dc 100644 --- a/src/panfrost/lib/pan_texture.c +++ b/src/panfrost/lib/pan_texture.c @@ -1350,6 +1350,30 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, } } +#elif PAN_ARCH >= 6 + +void +GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, + struct mali_attribute_buffer_packed *out_buf, + struct mali_attribute_packed *out_attrib) +{ + unsigned stride = util_format_get_blocksize(bview->format); + uint32_t hw_fmt = GENX(pan_format_from_pipe_format)(bview->format)->hw; + + pan_pack(out_buf, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.pointer = bview->base; + cfg.stride = stride; + cfg.size = bview->width_el * stride; + } + + pan_pack(out_attrib, ATTRIBUTE, cfg) { + cfg.format = hw_fmt; + cfg.offset = bview->offset; + cfg.offset_enable = bview->offset != 0; + } +} + #else void @@ -1377,12 +1401,6 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, cfg.texel_ordering = MALI_TEXTURE_LAYOUT_LINEAR; cfg.levels = 1; cfg.array_size = 1; - -#if PAN_ARCH >= 6 - cfg.surfaces = payload->gpu; - cfg.minimum_lod = 0; - cfg.maximum_lod = 0; -#endif } } diff --git a/src/panfrost/lib/pan_texture.h b/src/panfrost/lib/pan_texture.h index 6c0fb6291b6..9e5b2e10f58 100644 --- a/src/panfrost/lib/pan_texture.h +++ b/src/panfrost/lib/pan_texture.h @@ -96,6 +96,10 @@ void GENX(pan_tex_emit_afrc_payload_entry)( #if PAN_ARCH >= 9 void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, struct mali_buffer_packed *out); +#elif PAN_ARCH >= 6 +void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, + struct mali_attribute_buffer_packed *out_buf, + struct mali_attribute_packed *out_attrib); #else void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, struct mali_texture_packed *out, diff --git a/src/panfrost/vulkan/panvk_buffer_view.h b/src/panfrost/vulkan/panvk_buffer_view.h index 3bce7d4d865..4b456f92caf 100644 --- a/src/panfrost/vulkan/panvk_buffer_view.h +++ b/src/panfrost/vulkan/panvk_buffer_view.h @@ -21,18 +21,12 @@ struct panvk_buffer_view { struct vk_buffer_view vk; -#if PAN_ARCH < 9 - struct panvk_priv_mem mem; -#endif - struct { #if PAN_ARCH >= 9 struct mali_buffer_packed buf; #else - /* TODO: move Bifrost over to using BufferDescriptor as well. */ - struct mali_texture_packed tex; - - struct mali_attribute_buffer_packed img_attrib_buf[2]; + struct mali_attribute_buffer_packed attrib_buf; + struct mali_attribute_packed attrib; #endif } descs; }; diff --git a/src/panfrost/vulkan/panvk_vX_buffer_view.c b/src/panfrost/vulkan/panvk_vX_buffer_view.c index fa78e261198..64b13d0c138 100644 --- a/src/panfrost/vulkan/panvk_vX_buffer_view.c +++ b/src/panfrost/vulkan/panvk_vX_buffer_view.c @@ -44,75 +44,36 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, enum pipe_format pfmt = vk_format_to_pipe_format(view->vk.format); - uint64_t address = panvk_buffer_gpu_ptr(buffer, pCreateInfo->offset); - VkBufferUsageFlags tex_usage_mask = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; - -#if PAN_ARCH >= 9 - tex_usage_mask |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; -#else - /* This alignment constraint only applies when TextureDescriptors are used. */ - assert(!(address & 63)); -#endif + VkBufferUsageFlags tex_usage_mask = + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; if (buffer->vk.usage & tex_usage_mask) { +#if PAN_ARCH >= 9 struct pan_buffer_view bview = { .format = pfmt, - .astc.hdr = util_format_is_astc_hdr(pfmt), .width_el = view->vk.elements, - .base = address, + .base = panvk_buffer_gpu_ptr(buffer, pCreateInfo->offset), }; -#if PAN_ARCH >= 9 GENX(pan_buffer_texture_emit)(&bview, &view->descs.buf); #else - view->mem = - panvk_pool_alloc_desc(&device->mempools.rw, SURFACE_WITH_STRIDE); - if (!panvk_priv_mem_check_alloc(view->mem)) - return panvk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + /* Bifrost requires the base address to be 64 byte aligned and passes the + * remaing offset through the Attribute Descriptor. */ + uint64_t aligned_offset = pCreateInfo->offset & ~0x3f; + uint32_t remainder_offset = pCreateInfo->offset & 0x3f; + struct pan_buffer_view bview = { + .format = pfmt, + .width_el = view->vk.elements, + .base = panvk_buffer_gpu_ptr(buffer, aligned_offset), + .offset = remainder_offset, + }; - panvk_priv_mem_write(view->mem, 0, struct mali_surface_with_stride_packed, sd) { - struct pan_ptr ptr = { - .gpu = panvk_priv_mem_dev_addr(view->mem), - .cpu = sd, - }; - - GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr); - } + GENX(pan_buffer_texture_emit)(&bview, &view->descs.attrib_buf, + &view->descs.attrib); #endif } -#if PAN_ARCH < 9 - if (buffer->vk.usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { - unsigned blksz = vk_format_get_blocksize(pCreateInfo->format); - - pan_pack(&view->descs.img_attrib_buf[0], ATTRIBUTE_BUFFER, cfg) { - /* The format is the only thing we lack to emit attribute descriptors - * when copying from the set to the attribute tables. Instead of - * making the descriptor size to store an extra format, we pack - * the 22-bit format with the texel stride, which is expected to be - * fit in remaining 10 bits. - */ - uint32_t hw_fmt = GENX(pan_format_from_pipe_format)(pfmt)->hw; - - assert(blksz < BITFIELD_MASK(10)); - assert(hw_fmt < BITFIELD_MASK(22)); - - cfg.type = MALI_ATTRIBUTE_TYPE_3D_LINEAR; - cfg.pointer = address; - cfg.stride = blksz | (hw_fmt << 10); - cfg.size = view->vk.elements * blksz; - } - - struct mali_attribute_buffer_packed *buf = &view->descs.img_attrib_buf[1]; - pan_cast_and_pack(buf, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { - cfg.s_dimension = view->vk.elements; - cfg.t_dimension = 1; - cfg.r_dimension = 1; - cfg.row_stride = view->vk.elements * blksz; - } - } -#endif - *pView = panvk_buffer_view_to_handle(view); return VK_SUCCESS; } @@ -127,9 +88,5 @@ panvk_per_arch(DestroyBufferView)(VkDevice _device, VkBufferView bufferView, if (!view) return; -#if PAN_ARCH < 9 - panvk_pool_free_mem(&view->mem); -#endif - vk_buffer_view_destroy(&device->vk, pAllocator, &view->vk); } diff --git a/src/panfrost/vulkan/panvk_vX_descriptor_set.c b/src/panfrost/vulkan/panvk_vX_descriptor_set.c index 37b0b812603..2905692fcc3 100644 --- a/src/panfrost/vulkan/panvk_vX_descriptor_set.c +++ b/src/panfrost/vulkan/panvk_vX_descriptor_set.c @@ -252,10 +252,16 @@ write_buffer_view_desc(struct panvk_descriptor_set *set, VK_FROM_HANDLE(panvk_buffer_view, view, bufferView); #if PAN_ARCH < 9 - if (type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - write_desc(set, binding, elem, &view->descs.img_attrib_buf, NO_SUBDESC); - else - write_desc(set, binding, elem, &view->descs.tex, NO_SUBDESC); + struct { + struct mali_attribute_buffer_packed attr_buf_desc; + struct mali_attribute_packed attr_desc; + uint32_t pad[2]; + } padded_desc = { + .attr_buf_desc = view->descs.attrib_buf, + .attr_desc = view->descs.attrib, + }; + + write_desc(set, binding, elem, &padded_desc, NO_SUBDESC); #else write_desc(set, binding, elem, &view->descs.buf, NO_SUBDESC); #endif diff --git a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c index 78cdab02282..d7044fbdb07 100644 --- a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c +++ b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c @@ -130,13 +130,13 @@ desc_type_to_table_type( return sampler_subdesc ? PANVK_BIFROST_DESC_TABLE_SAMPLER : PANVK_BIFROST_DESC_TABLE_TEXTURE; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: return PANVK_BIFROST_DESC_TABLE_TEXTURE; case VK_DESCRIPTOR_TYPE_SAMPLER: return PANVK_BIFROST_DESC_TABLE_SAMPLER; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: return PANVK_BIFROST_DESC_TABLE_IMG; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: @@ -598,12 +598,10 @@ load_tex_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim, b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16, 1, 32, ctx); loaded_size = nir_idiv(b, size, stride); #else - nir_def *tex_w = load_resource_deref_desc( - b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 1, 16, ctx); - - /* S dimension is 16 bits wide. We don't support combining S,T dimensions - * to allow large buffers yet. */ - loaded_size = nir_iadd_imm(b, nir_u2u32(b, tex_w), 1); + nir_def *stride_size = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 8, 2, 32, ctx); + loaded_size = nir_idiv(b, nir_channel(b, stride_size, 1), + nir_channel(b, stride_size, 0)); #endif } else { nir_def *tex_w_h = load_resource_deref_desc( @@ -644,12 +642,10 @@ load_img_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim, return load_tex_size(b, deref, dim, is_array, ctx); if (dim == GLSL_SAMPLER_DIM_BUF) { - nir_def *tex_w = load_resource_deref_desc( - b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 1, 16, ctx); - - /* S dimension is 16 bits wide. We don't support combining S,T dimensions - * to allow large buffers yet. */ - return nir_iadd_imm(b, nir_u2u32(b, tex_w), 1); + nir_def *stride_size = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 8, 2, 32, ctx); + return nir_idiv(b, nir_channel(b, stride_size, 1), + nir_channel(b, stride_size, 0)); } else { nir_def *tex_sz = load_resource_deref_desc( b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 3, 16, ctx); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 67b42759086..b8b4522e4e5 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -894,7 +894,6 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, struct pan_compile_inputs input = *compile_input; pan_postprocess_nir(nir, input.gpu_id); - pan_nir_lower_texture_late(nir, input.gpu_id); if (nir->info.stage == MESA_SHADER_VERTEX) NIR_PASS(_, nir, nir_shader_intrinsics_pass, panvk_lower_load_vs_input, @@ -906,8 +905,11 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, /* since valhall, panvk_per_arch(nir_lower_descriptors) separates the * driver set and the user sets, and does not need pan_nir_lower_image_index */ - if (PAN_ARCH < 9 && nir->info.stage == MESA_SHADER_VERTEX) + if (PAN_ARCH < 9 && nir->info.stage == MESA_SHADER_VERTEX) { NIR_PASS(_, nir, pan_nir_lower_image_index, MAX_VS_ATTRIBS); + NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch_index, MAX_VS_ATTRIBS); + } + pan_nir_lower_texture_late(nir, input.gpu_id); if (noperspective_varyings && nir->info.stage == MESA_SHADER_VERTEX) { NIR_PASS(_, nir, nir_inline_sysval,