pan/bi: Make texel buffers use Attribute Buffers

Texel buffers are currently described by a TextureDescriptor, which
leads to restrictive limits on size and alignment. These limits can be
avoided by using AttributeDescriptors + AttributeBufferDescriptors
instead.

This requires us to access texel buffers using attributes rather than
textures, which involves setting up AttributeDescriptors and
AttributeBufferDescriptors in their respective allocations, rather than
the previous TextureDescriptors in the texture allocation.

This is already done for images, so we simply place the texel buffer
attributes after the images and ensure the indexing if offset correctly.

Accessing a texel buffer thus becomes:
1. Get the buffer address and ConversionDescriptor with LEA_ATTR[_IMM]
2. Use LD_CVT to get the value

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38490>
This commit is contained in:
Lars-Ivar Hesselberg Simonsen 2025-11-11 12:35:49 +01:00
parent 396ad57630
commit a21ee564e2
15 changed files with 281 additions and 148 deletions

View file

@ -88,8 +88,10 @@ struct panfrost_sampler_view {
struct mali_texture_packed bifrost_tex_descriptor;
};
#else
/* TODO: move Bifrost over to using BufferDescriptor as well. */
struct mali_texture_packed bifrost_tex_descriptor;
union {
uint64_t texel_buffer_base_ptr;
struct mali_texture_packed bifrost_tex_descriptor;
};
#endif
uint64_t texture_bo;
uint64_t texture_size;
@ -1709,6 +1711,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
panfrost_translate_texture_dimension(so->base.target);
if (so->base.target == PIPE_BUFFER) {
#if PAN_ARCH >= 9
struct pan_buffer_view bview = {
.format = format,
.width_el =
@ -1717,11 +1720,21 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
.base = prsrc->plane.base + so->base.u.buf.offset,
};
#if PAN_ARCH >= 9
void *tex = &so->bifrost_buf_descriptor;
GENX(pan_buffer_texture_emit)(&bview, tex);
return;
#elif PAN_ARCH >= 6
/* For Bifrost, we'll generate the Attribute Buffer Descriptor when
* setting up attributes, so only store the base pointer at this point. */
so->texel_buffer_base_ptr = prsrc->plane.base;
#else
struct pan_buffer_view bview = {
.format = format,
.width_el =
MIN2(so->base.u.buf.size / util_format_get_blocksize(format),
pan_get_max_texel_buffer_elements(PAN_ARCH)),
.base = prsrc->plane.base + so->base.u.buf.offset,
};
const struct util_format_description *desc =
util_format_description(format);
if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
@ -1730,11 +1743,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
bview.astc.hdr = util_format_is_astc_hdr(format);
}
#if PAN_ARCH >= 6
unsigned payload_size = pan_size(SURFACE_WITH_STRIDE);
#else
unsigned payload_size = pan_size(TEXTURE) + pan_size(SURFACE_WITH_STRIDE);
#endif
struct panfrost_pool *pool = so->pool ?: &ctx->descs;
struct pan_ptr payload =
@ -1747,16 +1756,13 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so,
so->state = panfrost_pool_take_ref(pool, payload.gpu);
void *tex = (PAN_ARCH >= 6) ? &so->bifrost_tex_descriptor : payload.cpu;
if (PAN_ARCH <= 5) {
payload.cpu += pan_size(TEXTURE);
payload.gpu += pan_size(TEXTURE);
}
void *tex = payload.cpu;
payload.cpu += pan_size(TEXTURE);
payload.gpu += pan_size(TEXTURE);
GENX(pan_buffer_texture_emit)(&bview, tex, &payload);
return;
#endif
return;
}
unsigned first_level = so->base.u.tex.first_level;
@ -1913,7 +1919,16 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
struct panfrost_resource *rsrc = pan_resource(pview->texture);
panfrost_update_sampler_view(view, &ctx->base);
#if PAN_ARCH < 9
if (pview->target == PIPE_BUFFER)
/* Texel buffers will be emitted as attributes */
panfrost_emit_null_texture(&out[i]);
else
out[i] = view->bifrost_tex_descriptor;
#else
out[i] = view->bifrost_tex_descriptor;
#endif
panfrost_batch_read_rsrc(batch, rsrc, stage);
panfrost_batch_add_bo(batch, view->state.bo, stage);
@ -1991,10 +2006,10 @@ panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
*/
static void
emit_image_attribs(struct panfrost_context *ctx, mesa_shader_stage shader,
struct mali_attribute_packed *attribs,
struct mali_attribute_packed *attribs, uint64_t image_mask,
unsigned first_image_buf_index)
{
unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
unsigned last_bit = util_last_bit(image_mask);
for (unsigned i = 0; i < last_bit; ++i) {
enum pipe_format format = ctx->images[shader][i].format;
@ -2008,6 +2023,27 @@ emit_image_attribs(struct panfrost_context *ctx, mesa_shader_stage shader,
}
}
#if PAN_ARCH >= 6
static void
emit_texbuf_attribs(struct panfrost_context *ctx, mesa_shader_stage shader,
struct mali_attribute_packed *attribs,
unsigned first_texel_buf_index)
{
unsigned i;
BITSET_FOREACH_SET(i, ctx->texture_buffer[shader].mask,
PIPE_MAX_SHADER_SAMPLER_VIEWS) {
struct panfrost_sampler_view *view = ctx->sampler_views[shader][i];
assert(view);
enum pipe_format format = view->base.format;
pan_pack(attribs + i, ATTRIBUTE, cfg) {
cfg.buffer_index = first_texel_buf_index + i;
cfg.offset_enable = false;
cfg.format = GENX(pan_format_from_pipe_format)(format)->hw;
}
}
}
#endif
static enum mali_attribute_type
pan_modifier_to_attr_type(uint64_t modifier)
{
@ -2023,10 +2059,10 @@ pan_modifier_to_attr_type(uint64_t modifier)
static void
emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader,
struct mali_attribute_buffer_packed *bufs)
struct mali_attribute_buffer_packed *bufs, uint64_t image_mask)
{
struct panfrost_context *ctx = batch->ctx;
unsigned last_bit = util_last_bit(ctx->image_mask[shader]);
unsigned last_bit = util_last_bit(image_mask);
for (unsigned i = 0; i < last_bit; ++i) {
struct pipe_image_view *image = &ctx->images[shader][i];
@ -2065,6 +2101,18 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader,
panfrost_track_image_access(batch, shader, image);
#if MALI_ARCH >= 6
if (is_buffer) {
pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) {
cfg.type = MALI_ATTRIBUTE_TYPE_1D;
cfg.pointer = rsrc->plane.base + offset;
cfg.stride = util_format_get_blocksize(image->format);
cfg.size = rsrc->base.width0;
}
continue;
}
#endif
pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) {
cfg.type = pan_modifier_to_attr_type(rsrc->image.props.modifier);
cfg.pointer = rsrc->plane.base + offset;
@ -2074,6 +2122,7 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader,
is_buffer ? 0 : image->u.tex.level);
}
#if MALI_ARCH <= 5
if (is_buffer) {
pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D,
cfg) {
@ -2084,6 +2133,7 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader,
continue;
}
#endif
pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D,
cfg) {
@ -2122,9 +2172,31 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader,
}
}
#if PAN_ARCH >= 6
static void
emit_texbuf_bufs(struct panfrost_context *ctx, mesa_shader_stage shader,
struct mali_attribute_buffer_packed *bufs)
{
unsigned i;
BITSET_FOREACH_SET(i, ctx->texture_buffer[shader].mask,
PIPE_MAX_SHADER_SAMPLER_VIEWS) {
struct panfrost_sampler_view *view = ctx->sampler_views[shader][i];
assert(view);
struct pipe_sampler_view *pview = &view->base;
pan_pack(bufs + i, ATTRIBUTE_BUFFER, cfg) {
cfg.type = MALI_ATTRIBUTE_TYPE_1D;
cfg.pointer = view->texel_buffer_base_ptr + pview->u.buf.offset;
cfg.stride = util_format_get_blocksize(pview->format);
cfg.size = pview->u.buf.size;
}
}
}
#endif
static uint64_t
panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers,
mesa_shader_stage type)
panfrost_emit_image_texbuf_attribs(struct panfrost_batch *batch,
uint64_t *buffers, mesa_shader_stage type)
{
struct panfrost_context *ctx = batch->ctx;
struct panfrost_compiled_shader *shader = ctx->prog[type];
@ -2134,9 +2206,23 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers,
return 0;
}
unsigned attr_count = util_last_bit(ctx->image_mask[type]);
/* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */
unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0);
#if PAN_ARCH >= 6
/* For Bifrost, we can only output images if they are used in the shader to
* ensure the offset for texel buffers is correct. Therefore, we check the
* mask here and ensure we emit attribs if the shader changes. */
uint64_t image_mask = ctx->image_mask[type] & shader->info.images_used;
unsigned image_count = util_last_bit(image_mask);
unsigned attr_count =
image_count + BITSET_LAST_BIT(ctx->texture_buffer[type].mask);
#else
uint64_t image_mask = ctx->image_mask[type];
unsigned image_count = util_last_bit(image_mask);
unsigned attr_count = image_count;
#endif
/* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D, so we need to
* use two buffers per image (counted once in attr_count, then once again in
* image_count) */
unsigned buf_count = attr_count + image_count + (PAN_ARCH >= 6 ? 1 : 0);
struct pan_ptr bufs =
pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER);
@ -2144,13 +2230,17 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers,
struct pan_ptr attribs =
pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE);
emit_image_attribs(ctx, type, attribs.cpu, 0);
emit_image_bufs(batch, type, bufs.cpu);
emit_image_attribs(ctx, type, attribs.cpu, image_mask, 0);
emit_image_bufs(batch, type, bufs.cpu, image_mask);
#if PAN_ARCH >= 6
/* Texel buffers come after the images, which require two buffers per image. */
unsigned image_buf_offset = image_count * 2;
emit_texbuf_attribs(ctx, type, attribs.cpu + image_count, image_buf_offset);
emit_texbuf_bufs(ctx, type, bufs.cpu + image_buf_offset);
/* We need an empty attrib buf to stop the prefetching on Bifrost */
#if PAN_ARCH >= 6
struct mali_attribute_buffer_packed *attrib_bufs = bufs.cpu;
struct mali_attribute_buffer_packed *attrib_bufs = bufs.cpu;
pan_pack(&attrib_bufs[buf_count - 1], ATTRIBUTE_BUFFER, cfg)
;
#endif
@ -2166,16 +2256,28 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers)
struct panfrost_vertex_state *so = ctx->vertex;
struct panfrost_compiled_shader *vs = ctx->prog[MESA_SHADER_VERTEX];
bool instanced = ctx->instance_count > 1;
uint32_t image_mask = ctx->image_mask[MESA_SHADER_VERTEX];
unsigned nr_texbuf;
#if PAN_ARCH >= 6
/* For Bifrost, we can only output images if they are used in the shader to
* ensure the offset for texel buffers is correct. Therefore, we check the
* mask here and ensure we emit attribs if the shader changes. */
uint64_t image_mask =
ctx->image_mask[MESA_SHADER_VERTEX] & vs->info.images_used;
nr_texbuf = BITSET_LAST_BIT(ctx->texture_buffer[MESA_SHADER_VERTEX].mask);
#else
uint64_t image_mask = ctx->image_mask[MESA_SHADER_VERTEX];
nr_texbuf = 0;
#endif
unsigned nr_images = util_last_bit(image_mask);
/* Worst case: everything is NPOT, which is only possible if instancing
* is enabled. Otherwise single record is gauranteed.
* Also, we allocate more memory than what's needed here if either instancing
* is enabled or images are present, this can be improved. */
unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1;
* Images always use two buffer descriptors. */
unsigned attrib_bufs =
instanced ? so->nr_bufs * 2 : ALIGN_POT(so->nr_bufs, 2);
unsigned nr_bufs =
((so->nr_bufs + nr_images) * bufs_per_attrib) + (PAN_ARCH >= 6 ? 1 : 0);
attrib_bufs + (nr_images * 2) + nr_texbuf + (PAN_ARCH >= 6 ? 1 : 0);
unsigned count = vs->info.attribute_count;
@ -2326,12 +2428,20 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers)
if (nr_images) {
k = ALIGN_POT(k, 2);
emit_image_attribs(ctx, MESA_SHADER_VERTEX, out + so->num_elements, k);
emit_image_bufs(batch, MESA_SHADER_VERTEX, bufs + k);
k += (util_last_bit(ctx->image_mask[MESA_SHADER_VERTEX]) * 2);
emit_image_attribs(ctx, MESA_SHADER_VERTEX, out + so->num_elements,
image_mask, k);
emit_image_bufs(batch, MESA_SHADER_VERTEX, bufs + k, image_mask);
k += (nr_images * 2);
}
#if PAN_ARCH >= 6
if (nr_texbuf) {
emit_texbuf_attribs(ctx, MESA_SHADER_VERTEX,
out + so->num_elements + nr_images, k);
emit_texbuf_bufs(ctx, MESA_SHADER_VERTEX, bufs + k);
k += nr_texbuf;
}
/* We need an empty attrib buf to stop the prefetching on Bifrost */
pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg)
;
@ -3040,12 +3150,20 @@ panfrost_update_shader_state(struct panfrost_batch *batch,
batch->rsd[st] = panfrost_emit_frag_shader_meta(batch);
}
#if PAN_ARCH >= 6
/* Bifrost needs to place texel buffers after the image attributes, so we
* need to emit them if textures or the shader is dirty. */
unsigned attribs_dirty_mask =
PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_TEXTURE | PAN_DIRTY_STAGE_SHADER;
#else
unsigned attribs_dirty_mask = PAN_DIRTY_STAGE_IMAGE;
#endif
/* Vertex shaders need to mix vertex data and image descriptors in the
* attribute array. This is taken care of in panfrost_update_state_3d().
*/
if (st != MESA_SHADER_VERTEX && (dirty & PAN_DIRTY_STAGE_IMAGE)) {
if (st != MESA_SHADER_VERTEX && (dirty & attribs_dirty_mask)) {
batch->attribs[st] =
panfrost_emit_image_attribs(batch, &batch->attrib_bufs[st], st);
panfrost_emit_image_texbuf_attribs(batch, &batch->attrib_bufs[st], st);
}
#endif
}
@ -3086,12 +3204,19 @@ panfrost_update_state_3d(struct panfrost_batch *batch)
bool attr_offsetted_by_instance_base =
vstate->attr_depends_on_base_instance_mask &
BITFIELD_MASK(vs->info.attributes_read_count);
#if PAN_ARCH >= 6
/* Bifrost needs to place texel buffers after the image attributes, so we
* need to emit them if textures or the shader is dirty. */
unsigned attribs_dirty_mask =
PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_TEXTURE | PAN_DIRTY_STAGE_SHADER;
#else
unsigned attribs_dirty_mask = PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_SHADER;
#endif
/* Vertex data, vertex shader and images accessed by the vertex shader have
* an impact on the attributes array, we need to re-emit anytime one of these
* parameters changes. */
if ((dirty & PAN_DIRTY_VERTEX) ||
(vt_shader_dirty & (PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_SHADER)) ||
if ((dirty & PAN_DIRTY_VERTEX) || (vt_shader_dirty & attribs_dirty_mask) ||
attr_offsetted_by_instance_base) {
batch->attribs[MESA_SHADER_VERTEX] = panfrost_emit_vertex_data(
batch, &batch->attrib_bufs[MESA_SHADER_VERTEX]);

View file

@ -397,12 +397,18 @@ panfrost_set_sampler_views(struct pipe_context *pctx,
if (view)
new_nr = p + 1;
if (view && view->target == PIPE_BUFFER)
BITSET_SET(ctx->texture_buffer[shader].mask, p);
else
BITSET_CLEAR(ctx->texture_buffer[shader].mask, p);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&ctx->sampler_views[shader][p], view);
}
for (; i < num_views + unbind_num_trailing_slots; i++) {
unsigned p = i + start_slot;
BITSET_CLEAR(ctx->texture_buffer[shader].mask, p);
pipe_sampler_view_reference(
(struct pipe_sampler_view **)&ctx->sampler_views[shader][p], NULL);
}
@ -417,8 +423,14 @@ panfrost_set_sampler_views(struct pipe_context *pctx,
* set sampler views */
if (new_nr == 0) {
for (i = 0; i < start_slot; ++i) {
if (ctx->sampler_views[shader][i])
struct pipe_sampler_view *view =
(struct pipe_sampler_view *)ctx->sampler_views[shader][i];
if (view)
new_nr = i + 1;
if (view && view->target == PIPE_BUFFER)
BITSET_SET(ctx->texture_buffer[shader].mask, i);
else
BITSET_CLEAR(ctx->texture_buffer[shader].mask, i);
}
}

View file

@ -206,6 +206,9 @@ struct panfrost_context {
struct panfrost_sampler_view
*sampler_views[MESA_SHADER_STAGES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned sampler_view_count[MESA_SHADER_STAGES];
struct {
BITSET_DECLARE(mask, PIPE_MAX_SHADER_SAMPLER_VIEWS);
} texture_buffer[MESA_SHADER_STAGES];
struct blitter_context *blitter;

View file

@ -559,13 +559,20 @@ panfrost_create_shader_state(struct pipe_context *pctx,
so->noperspective_varyings =
pan_nir_collect_noperspective_varyings_fs(nir);
/* Vertex shaders get passed images through the vertex attribute descriptor
* array. We need to add an offset to all image intrinsics so they point
* to the right attribute.
*/
unsigned attrib_offset = 0;
if (nir->info.stage == MESA_SHADER_VERTEX && dev->arch <= 7) {
NIR_PASS(_, nir, pan_nir_lower_image_index,
util_bitcount64(nir->info.inputs_read));
/* Vertex shaders get passed images through the vertex attribute
* descriptor array. We need to add an offset to all image intrinsics so
* they point to the right attribute.
*/
attrib_offset += util_bitcount64(nir->info.inputs_read);
NIR_PASS(_, nir, pan_nir_lower_image_index, attrib_offset);
}
if (dev->arch >= 6 && dev->arch <= 7) {
/* Bifrost needs to use attributes to access texel buffers. We place these
* after images, which are also accessed using attributes. */
attrib_offset += BITSET_LAST_BIT(nir->info.images_used);
NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch_index, attrib_offset);
}
/* If this shader uses transform feedback, compile the transform

View file

@ -1605,7 +1605,7 @@ static void
bi_emit_image_load(bi_builder *b, nir_intrinsic_instr *instr)
{
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) &&
assert((dim != GLSL_SAMPLER_DIM_BUF) &&
"Texel buffers should already have been lowered");
unsigned coord_comps = nir_image_intrinsic_coord_components(instr);
bool array =
@ -1657,7 +1657,7 @@ static void
bi_emit_lea_image_to(bi_builder *b, bi_index dest, nir_intrinsic_instr *instr)
{
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) &&
assert((dim != GLSL_SAMPLER_DIM_BUF) &&
"Texel buffers should already have been lowered");
bool array =
nir_intrinsic_image_array(instr) || dim == GLSL_SAMPLER_DIM_CUBE;
@ -4242,6 +4242,10 @@ enum bifrost_tex_dreg {
static void
bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
{
assert((instr->op != nir_texop_txf ||
instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) &&
"Texel buffers should already have been lowered");
struct bifrost_texture_operation desc = {
.op = bi_tex_op(instr->op),
.offset_or_bias_disable = false, /* TODO */
@ -6600,10 +6604,8 @@ pan_nir_lower_buf_image_access(nir_shader *shader, unsigned arch)
void
bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id)
{
if (pan_arch(gpu_id) >= 9) {
NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, pan_arch(gpu_id));
NIR_PASS(_, nir, pan_nir_lower_buf_image_access, pan_arch(gpu_id));
}
NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, pan_arch(gpu_id));
NIR_PASS(_, nir, pan_nir_lower_buf_image_access, pan_arch(gpu_id));
}
static int

View file

@ -112,7 +112,8 @@ pan_nir_lower_texture_late(nir_shader *nir, unsigned gpu_id)
{
/* This must be called after any lowering of resource indices
* (panfrost_nir_lower_res_indices / panvk_per_arch(nir_lower_descriptors))
*/
* and lowering of attribute indices (pan_nir_lower_image_index /
* pan_nir_lower_texel_buffer_fetch_index) */
if (pan_arch(gpu_id) >= 6)
bifrost_lower_texture_late_nir(nir, gpu_id);
}
@ -281,7 +282,11 @@ pan_shader_update_info(struct pan_shader_info *info, nir_shader *s,
}
info->outputs_written = s->info.outputs_written;
info->images_used =
s->info.images_used[0] | ((uint64_t)s->info.images_used[1]) << 32;
info->attribute_count += BITSET_LAST_BIT(s->info.images_used);
if (arch >= 6 && arch < 9)
info->attribute_count += BITSET_LAST_BIT(s->info.texture_buffers);
info->writes_global = s->info.writes_memory;
info->ubo_count = s->info.num_ubos;

View file

@ -332,6 +332,7 @@ struct pan_shader_info {
unsigned attributes_read_count;
unsigned attribute_count;
unsigned attributes_read;
uint64_t images_used;
struct {
unsigned input_count;

View file

@ -42,6 +42,7 @@ struct pan_buffer_view {
} astc;
unsigned width_el;
uint64_t base;
uint32_t offset;
};
struct pan_compute_dim {

View file

@ -1350,6 +1350,30 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
}
}
#elif PAN_ARCH >= 6
void
GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_attribute_buffer_packed *out_buf,
struct mali_attribute_packed *out_attrib)
{
unsigned stride = util_format_get_blocksize(bview->format);
uint32_t hw_fmt = GENX(pan_format_from_pipe_format)(bview->format)->hw;
pan_pack(out_buf, ATTRIBUTE_BUFFER, cfg) {
cfg.type = MALI_ATTRIBUTE_TYPE_1D;
cfg.pointer = bview->base;
cfg.stride = stride;
cfg.size = bview->width_el * stride;
}
pan_pack(out_attrib, ATTRIBUTE, cfg) {
cfg.format = hw_fmt;
cfg.offset = bview->offset;
cfg.offset_enable = bview->offset != 0;
}
}
#else
void
@ -1377,12 +1401,6 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
cfg.texel_ordering = MALI_TEXTURE_LAYOUT_LINEAR;
cfg.levels = 1;
cfg.array_size = 1;
#if PAN_ARCH >= 6
cfg.surfaces = payload->gpu;
cfg.minimum_lod = 0;
cfg.maximum_lod = 0;
#endif
}
}

View file

@ -96,6 +96,10 @@ void GENX(pan_tex_emit_afrc_payload_entry)(
#if PAN_ARCH >= 9
void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_buffer_packed *out);
#elif PAN_ARCH >= 6
void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_attribute_buffer_packed *out_buf,
struct mali_attribute_packed *out_attrib);
#else
void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview,
struct mali_texture_packed *out,

View file

@ -21,18 +21,12 @@
struct panvk_buffer_view {
struct vk_buffer_view vk;
#if PAN_ARCH < 9
struct panvk_priv_mem mem;
#endif
struct {
#if PAN_ARCH >= 9
struct mali_buffer_packed buf;
#else
/* TODO: move Bifrost over to using BufferDescriptor as well. */
struct mali_texture_packed tex;
struct mali_attribute_buffer_packed img_attrib_buf[2];
struct mali_attribute_buffer_packed attrib_buf;
struct mali_attribute_packed attrib;
#endif
} descs;
};

View file

@ -44,75 +44,36 @@ panvk_per_arch(CreateBufferView)(VkDevice _device,
enum pipe_format pfmt = vk_format_to_pipe_format(view->vk.format);
uint64_t address = panvk_buffer_gpu_ptr(buffer, pCreateInfo->offset);
VkBufferUsageFlags tex_usage_mask = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
#if PAN_ARCH >= 9
tex_usage_mask |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
#else
/* This alignment constraint only applies when TextureDescriptors are used. */
assert(!(address & 63));
#endif
VkBufferUsageFlags tex_usage_mask =
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
if (buffer->vk.usage & tex_usage_mask) {
#if PAN_ARCH >= 9
struct pan_buffer_view bview = {
.format = pfmt,
.astc.hdr = util_format_is_astc_hdr(pfmt),
.width_el = view->vk.elements,
.base = address,
.base = panvk_buffer_gpu_ptr(buffer, pCreateInfo->offset),
};
#if PAN_ARCH >= 9
GENX(pan_buffer_texture_emit)(&bview, &view->descs.buf);
#else
view->mem =
panvk_pool_alloc_desc(&device->mempools.rw, SURFACE_WITH_STRIDE);
if (!panvk_priv_mem_check_alloc(view->mem))
return panvk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
/* Bifrost requires the base address to be 64 byte aligned and passes the
* remaing offset through the Attribute Descriptor. */
uint64_t aligned_offset = pCreateInfo->offset & ~0x3f;
uint32_t remainder_offset = pCreateInfo->offset & 0x3f;
struct pan_buffer_view bview = {
.format = pfmt,
.width_el = view->vk.elements,
.base = panvk_buffer_gpu_ptr(buffer, aligned_offset),
.offset = remainder_offset,
};
panvk_priv_mem_write(view->mem, 0, struct mali_surface_with_stride_packed, sd) {
struct pan_ptr ptr = {
.gpu = panvk_priv_mem_dev_addr(view->mem),
.cpu = sd,
};
GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr);
}
GENX(pan_buffer_texture_emit)(&bview, &view->descs.attrib_buf,
&view->descs.attrib);
#endif
}
#if PAN_ARCH < 9
if (buffer->vk.usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) {
unsigned blksz = vk_format_get_blocksize(pCreateInfo->format);
pan_pack(&view->descs.img_attrib_buf[0], ATTRIBUTE_BUFFER, cfg) {
/* The format is the only thing we lack to emit attribute descriptors
* when copying from the set to the attribute tables. Instead of
* making the descriptor size to store an extra format, we pack
* the 22-bit format with the texel stride, which is expected to be
* fit in remaining 10 bits.
*/
uint32_t hw_fmt = GENX(pan_format_from_pipe_format)(pfmt)->hw;
assert(blksz < BITFIELD_MASK(10));
assert(hw_fmt < BITFIELD_MASK(22));
cfg.type = MALI_ATTRIBUTE_TYPE_3D_LINEAR;
cfg.pointer = address;
cfg.stride = blksz | (hw_fmt << 10);
cfg.size = view->vk.elements * blksz;
}
struct mali_attribute_buffer_packed *buf = &view->descs.img_attrib_buf[1];
pan_cast_and_pack(buf, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) {
cfg.s_dimension = view->vk.elements;
cfg.t_dimension = 1;
cfg.r_dimension = 1;
cfg.row_stride = view->vk.elements * blksz;
}
}
#endif
*pView = panvk_buffer_view_to_handle(view);
return VK_SUCCESS;
}
@ -127,9 +88,5 @@ panvk_per_arch(DestroyBufferView)(VkDevice _device, VkBufferView bufferView,
if (!view)
return;
#if PAN_ARCH < 9
panvk_pool_free_mem(&view->mem);
#endif
vk_buffer_view_destroy(&device->vk, pAllocator, &view->vk);
}

View file

@ -252,10 +252,16 @@ write_buffer_view_desc(struct panvk_descriptor_set *set,
VK_FROM_HANDLE(panvk_buffer_view, view, bufferView);
#if PAN_ARCH < 9
if (type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER)
write_desc(set, binding, elem, &view->descs.img_attrib_buf, NO_SUBDESC);
else
write_desc(set, binding, elem, &view->descs.tex, NO_SUBDESC);
struct {
struct mali_attribute_buffer_packed attr_buf_desc;
struct mali_attribute_packed attr_desc;
uint32_t pad[2];
} padded_desc = {
.attr_buf_desc = view->descs.attrib_buf,
.attr_desc = view->descs.attrib,
};
write_desc(set, binding, elem, &padded_desc, NO_SUBDESC);
#else
write_desc(set, binding, elem, &view->descs.buf, NO_SUBDESC);
#endif

View file

@ -130,13 +130,13 @@ desc_type_to_table_type(
return sampler_subdesc ? PANVK_BIFROST_DESC_TABLE_SAMPLER
: PANVK_BIFROST_DESC_TABLE_TEXTURE;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
return PANVK_BIFROST_DESC_TABLE_TEXTURE;
case VK_DESCRIPTOR_TYPE_SAMPLER:
return PANVK_BIFROST_DESC_TABLE_SAMPLER;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
return PANVK_BIFROST_DESC_TABLE_IMG;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
@ -598,12 +598,10 @@ load_tex_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim,
b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16, 1, 32, ctx);
loaded_size = nir_idiv(b, size, stride);
#else
nir_def *tex_w = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 1, 16, ctx);
/* S dimension is 16 bits wide. We don't support combining S,T dimensions
* to allow large buffers yet. */
loaded_size = nir_iadd_imm(b, nir_u2u32(b, tex_w), 1);
nir_def *stride_size = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 8, 2, 32, ctx);
loaded_size = nir_idiv(b, nir_channel(b, stride_size, 1),
nir_channel(b, stride_size, 0));
#endif
} else {
nir_def *tex_w_h = load_resource_deref_desc(
@ -644,12 +642,10 @@ load_img_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim,
return load_tex_size(b, deref, dim, is_array, ctx);
if (dim == GLSL_SAMPLER_DIM_BUF) {
nir_def *tex_w = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 1, 16, ctx);
/* S dimension is 16 bits wide. We don't support combining S,T dimensions
* to allow large buffers yet. */
return nir_iadd_imm(b, nir_u2u32(b, tex_w), 1);
nir_def *stride_size = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 8, 2, 32, ctx);
return nir_idiv(b, nir_channel(b, stride_size, 1),
nir_channel(b, stride_size, 0));
} else {
nir_def *tex_sz = load_resource_deref_desc(
b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 3, 16, ctx);

View file

@ -894,7 +894,6 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
struct pan_compile_inputs input = *compile_input;
pan_postprocess_nir(nir, input.gpu_id);
pan_nir_lower_texture_late(nir, input.gpu_id);
if (nir->info.stage == MESA_SHADER_VERTEX)
NIR_PASS(_, nir, nir_shader_intrinsics_pass, panvk_lower_load_vs_input,
@ -906,8 +905,11 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
/* since valhall, panvk_per_arch(nir_lower_descriptors) separates the
* driver set and the user sets, and does not need pan_nir_lower_image_index
*/
if (PAN_ARCH < 9 && nir->info.stage == MESA_SHADER_VERTEX)
if (PAN_ARCH < 9 && nir->info.stage == MESA_SHADER_VERTEX) {
NIR_PASS(_, nir, pan_nir_lower_image_index, MAX_VS_ATTRIBS);
NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch_index, MAX_VS_ATTRIBS);
}
pan_nir_lower_texture_late(nir, input.gpu_id);
if (noperspective_varyings && nir->info.stage == MESA_SHADER_VERTEX) {
NIR_PASS(_, nir, nir_inline_sysval,