diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 795ce1401d2..9e14641ce94 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -88,8 +88,10 @@ struct panfrost_sampler_view { struct mali_texture_packed bifrost_tex_descriptor; }; #else - /* TODO: move Bifrost over to using BufferDescriptor as well. */ - struct mali_texture_packed bifrost_tex_descriptor; + union { + uint64_t texel_buffer_base_ptr; + struct mali_texture_packed bifrost_tex_descriptor; + }; #endif uint64_t texture_bo; uint64_t texture_size; @@ -1709,6 +1711,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, panfrost_translate_texture_dimension(so->base.target); if (so->base.target == PIPE_BUFFER) { +#if PAN_ARCH >= 9 struct pan_buffer_view bview = { .format = format, .width_el = @@ -1717,11 +1720,21 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, .base = prsrc->plane.base + so->base.u.buf.offset, }; -#if PAN_ARCH >= 9 void *tex = &so->bifrost_buf_descriptor; GENX(pan_buffer_texture_emit)(&bview, tex); - return; +#elif PAN_ARCH >= 6 + /* For Bifrost, we'll generate the Attribute Buffer Descriptor when + * setting up attributes, so only store the base pointer at this point. */ + so->texel_buffer_base_ptr = prsrc->plane.base; #else + struct pan_buffer_view bview = { + .format = format, + .width_el = + MIN2(so->base.u.buf.size / util_format_get_blocksize(format), + pan_get_max_texel_buffer_elements(PAN_ARCH)), + .base = prsrc->plane.base + so->base.u.buf.offset, + }; + const struct util_format_description *desc = util_format_description(format); if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { @@ -1730,11 +1743,7 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, bview.astc.hdr = util_format_is_astc_hdr(format); } -#if PAN_ARCH >= 6 - unsigned payload_size = pan_size(SURFACE_WITH_STRIDE); -#else unsigned payload_size = pan_size(TEXTURE) + pan_size(SURFACE_WITH_STRIDE); -#endif struct panfrost_pool *pool = so->pool ?: &ctx->descs; struct pan_ptr payload = @@ -1747,16 +1756,13 @@ panfrost_create_sampler_view_bo(struct panfrost_sampler_view *so, so->state = panfrost_pool_take_ref(pool, payload.gpu); - void *tex = (PAN_ARCH >= 6) ? &so->bifrost_tex_descriptor : payload.cpu; - - if (PAN_ARCH <= 5) { - payload.cpu += pan_size(TEXTURE); - payload.gpu += pan_size(TEXTURE); - } + void *tex = payload.cpu; + payload.cpu += pan_size(TEXTURE); + payload.gpu += pan_size(TEXTURE); GENX(pan_buffer_texture_emit)(&bview, tex, &payload); - return; #endif + return; } unsigned first_level = so->base.u.tex.first_level; @@ -1913,7 +1919,16 @@ panfrost_emit_texture_descriptors(struct panfrost_batch *batch, struct panfrost_resource *rsrc = pan_resource(pview->texture); panfrost_update_sampler_view(view, &ctx->base); + +#if PAN_ARCH < 9 + if (pview->target == PIPE_BUFFER) + /* Texel buffers will be emitted as attributes */ + panfrost_emit_null_texture(&out[i]); + else + out[i] = view->bifrost_tex_descriptor; +#else out[i] = view->bifrost_tex_descriptor; +#endif panfrost_batch_read_rsrc(batch, rsrc, stage); panfrost_batch_add_bo(batch, view->state.bo, stage); @@ -1991,10 +2006,10 @@ panfrost_emit_sampler_descriptors(struct panfrost_batch *batch, */ static void emit_image_attribs(struct panfrost_context *ctx, mesa_shader_stage shader, - struct mali_attribute_packed *attribs, + struct mali_attribute_packed *attribs, uint64_t image_mask, unsigned first_image_buf_index) { - unsigned last_bit = util_last_bit(ctx->image_mask[shader]); + unsigned last_bit = util_last_bit(image_mask); for (unsigned i = 0; i < last_bit; ++i) { enum pipe_format format = ctx->images[shader][i].format; @@ -2008,6 +2023,27 @@ emit_image_attribs(struct panfrost_context *ctx, mesa_shader_stage shader, } } +#if PAN_ARCH >= 6 +static void +emit_texbuf_attribs(struct panfrost_context *ctx, mesa_shader_stage shader, + struct mali_attribute_packed *attribs, + unsigned first_texel_buf_index) +{ + unsigned i; + BITSET_FOREACH_SET(i, ctx->texture_buffer[shader].mask, + PIPE_MAX_SHADER_SAMPLER_VIEWS) { + struct panfrost_sampler_view *view = ctx->sampler_views[shader][i]; + assert(view); + enum pipe_format format = view->base.format; + pan_pack(attribs + i, ATTRIBUTE, cfg) { + cfg.buffer_index = first_texel_buf_index + i; + cfg.offset_enable = false; + cfg.format = GENX(pan_format_from_pipe_format)(format)->hw; + } + } +} +#endif + static enum mali_attribute_type pan_modifier_to_attr_type(uint64_t modifier) { @@ -2023,10 +2059,10 @@ pan_modifier_to_attr_type(uint64_t modifier) static void emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, - struct mali_attribute_buffer_packed *bufs) + struct mali_attribute_buffer_packed *bufs, uint64_t image_mask) { struct panfrost_context *ctx = batch->ctx; - unsigned last_bit = util_last_bit(ctx->image_mask[shader]); + unsigned last_bit = util_last_bit(image_mask); for (unsigned i = 0; i < last_bit; ++i) { struct pipe_image_view *image = &ctx->images[shader][i]; @@ -2065,6 +2101,18 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, panfrost_track_image_access(batch, shader, image); +#if MALI_ARCH >= 6 + if (is_buffer) { + pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.pointer = rsrc->plane.base + offset; + cfg.stride = util_format_get_blocksize(image->format); + cfg.size = rsrc->base.width0; + } + continue; + } +#endif + pan_pack(bufs + (i * 2), ATTRIBUTE_BUFFER, cfg) { cfg.type = pan_modifier_to_attr_type(rsrc->image.props.modifier); cfg.pointer = rsrc->plane.base + offset; @@ -2074,6 +2122,7 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, is_buffer ? 0 : image->u.tex.level); } +#if MALI_ARCH <= 5 if (is_buffer) { pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { @@ -2084,6 +2133,7 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, continue; } +#endif pan_cast_and_pack(&bufs[(i * 2) + 1], ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { @@ -2122,9 +2172,31 @@ emit_image_bufs(struct panfrost_batch *batch, mesa_shader_stage shader, } } +#if PAN_ARCH >= 6 +static void +emit_texbuf_bufs(struct panfrost_context *ctx, mesa_shader_stage shader, + struct mali_attribute_buffer_packed *bufs) +{ + unsigned i; + BITSET_FOREACH_SET(i, ctx->texture_buffer[shader].mask, + PIPE_MAX_SHADER_SAMPLER_VIEWS) { + struct panfrost_sampler_view *view = ctx->sampler_views[shader][i]; + assert(view); + struct pipe_sampler_view *pview = &view->base; + + pan_pack(bufs + i, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.pointer = view->texel_buffer_base_ptr + pview->u.buf.offset; + cfg.stride = util_format_get_blocksize(pview->format); + cfg.size = pview->u.buf.size; + } + } +} +#endif + static uint64_t -panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers, - mesa_shader_stage type) +panfrost_emit_image_texbuf_attribs(struct panfrost_batch *batch, + uint64_t *buffers, mesa_shader_stage type) { struct panfrost_context *ctx = batch->ctx; struct panfrost_compiled_shader *shader = ctx->prog[type]; @@ -2134,9 +2206,23 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers, return 0; } - unsigned attr_count = util_last_bit(ctx->image_mask[type]); - /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D */ - unsigned buf_count = (attr_count * 2) + (PAN_ARCH >= 6 ? 1 : 0); +#if PAN_ARCH >= 6 + /* For Bifrost, we can only output images if they are used in the shader to + * ensure the offset for texel buffers is correct. Therefore, we check the + * mask here and ensure we emit attribs if the shader changes. */ + uint64_t image_mask = ctx->image_mask[type] & shader->info.images_used; + unsigned image_count = util_last_bit(image_mask); + unsigned attr_count = + image_count + BITSET_LAST_BIT(ctx->texture_buffer[type].mask); +#else + uint64_t image_mask = ctx->image_mask[type]; + unsigned image_count = util_last_bit(image_mask); + unsigned attr_count = image_count; +#endif + /* Images always need a MALI_ATTRIBUTE_BUFFER_CONTINUATION_3D, so we need to + * use two buffers per image (counted once in attr_count, then once again in + * image_count) */ + unsigned buf_count = attr_count + image_count + (PAN_ARCH >= 6 ? 1 : 0); struct pan_ptr bufs = pan_pool_alloc_desc_array(&batch->pool.base, buf_count, ATTRIBUTE_BUFFER); @@ -2144,13 +2230,17 @@ panfrost_emit_image_attribs(struct panfrost_batch *batch, uint64_t *buffers, struct pan_ptr attribs = pan_pool_alloc_desc_array(&batch->pool.base, attr_count, ATTRIBUTE); - emit_image_attribs(ctx, type, attribs.cpu, 0); - emit_image_bufs(batch, type, bufs.cpu); + emit_image_attribs(ctx, type, attribs.cpu, image_mask, 0); + emit_image_bufs(batch, type, bufs.cpu, image_mask); + +#if PAN_ARCH >= 6 + /* Texel buffers come after the images, which require two buffers per image. */ + unsigned image_buf_offset = image_count * 2; + emit_texbuf_attribs(ctx, type, attribs.cpu + image_count, image_buf_offset); + emit_texbuf_bufs(ctx, type, bufs.cpu + image_buf_offset); /* We need an empty attrib buf to stop the prefetching on Bifrost */ -#if PAN_ARCH >= 6 - struct mali_attribute_buffer_packed *attrib_bufs = bufs.cpu; - + struct mali_attribute_buffer_packed *attrib_bufs = bufs.cpu; pan_pack(&attrib_bufs[buf_count - 1], ATTRIBUTE_BUFFER, cfg) ; #endif @@ -2166,16 +2256,28 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers) struct panfrost_vertex_state *so = ctx->vertex; struct panfrost_compiled_shader *vs = ctx->prog[MESA_SHADER_VERTEX]; bool instanced = ctx->instance_count > 1; - uint32_t image_mask = ctx->image_mask[MESA_SHADER_VERTEX]; + + unsigned nr_texbuf; +#if PAN_ARCH >= 6 + /* For Bifrost, we can only output images if they are used in the shader to + * ensure the offset for texel buffers is correct. Therefore, we check the + * mask here and ensure we emit attribs if the shader changes. */ + uint64_t image_mask = + ctx->image_mask[MESA_SHADER_VERTEX] & vs->info.images_used; + nr_texbuf = BITSET_LAST_BIT(ctx->texture_buffer[MESA_SHADER_VERTEX].mask); +#else + uint64_t image_mask = ctx->image_mask[MESA_SHADER_VERTEX]; + nr_texbuf = 0; +#endif unsigned nr_images = util_last_bit(image_mask); /* Worst case: everything is NPOT, which is only possible if instancing * is enabled. Otherwise single record is gauranteed. - * Also, we allocate more memory than what's needed here if either instancing - * is enabled or images are present, this can be improved. */ - unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1; + * Images always use two buffer descriptors. */ + unsigned attrib_bufs = + instanced ? so->nr_bufs * 2 : ALIGN_POT(so->nr_bufs, 2); unsigned nr_bufs = - ((so->nr_bufs + nr_images) * bufs_per_attrib) + (PAN_ARCH >= 6 ? 1 : 0); + attrib_bufs + (nr_images * 2) + nr_texbuf + (PAN_ARCH >= 6 ? 1 : 0); unsigned count = vs->info.attribute_count; @@ -2326,12 +2428,20 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, uint64_t *buffers) if (nr_images) { k = ALIGN_POT(k, 2); - emit_image_attribs(ctx, MESA_SHADER_VERTEX, out + so->num_elements, k); - emit_image_bufs(batch, MESA_SHADER_VERTEX, bufs + k); - k += (util_last_bit(ctx->image_mask[MESA_SHADER_VERTEX]) * 2); + emit_image_attribs(ctx, MESA_SHADER_VERTEX, out + so->num_elements, + image_mask, k); + emit_image_bufs(batch, MESA_SHADER_VERTEX, bufs + k, image_mask); + k += (nr_images * 2); } #if PAN_ARCH >= 6 + if (nr_texbuf) { + emit_texbuf_attribs(ctx, MESA_SHADER_VERTEX, + out + so->num_elements + nr_images, k); + emit_texbuf_bufs(ctx, MESA_SHADER_VERTEX, bufs + k); + k += nr_texbuf; + } + /* We need an empty attrib buf to stop the prefetching on Bifrost */ pan_pack(&bufs[k], ATTRIBUTE_BUFFER, cfg) ; @@ -3040,12 +3150,20 @@ panfrost_update_shader_state(struct panfrost_batch *batch, batch->rsd[st] = panfrost_emit_frag_shader_meta(batch); } +#if PAN_ARCH >= 6 + /* Bifrost needs to place texel buffers after the image attributes, so we + * need to emit them if textures or the shader is dirty. */ + unsigned attribs_dirty_mask = + PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_TEXTURE | PAN_DIRTY_STAGE_SHADER; +#else + unsigned attribs_dirty_mask = PAN_DIRTY_STAGE_IMAGE; +#endif /* Vertex shaders need to mix vertex data and image descriptors in the * attribute array. This is taken care of in panfrost_update_state_3d(). */ - if (st != MESA_SHADER_VERTEX && (dirty & PAN_DIRTY_STAGE_IMAGE)) { + if (st != MESA_SHADER_VERTEX && (dirty & attribs_dirty_mask)) { batch->attribs[st] = - panfrost_emit_image_attribs(batch, &batch->attrib_bufs[st], st); + panfrost_emit_image_texbuf_attribs(batch, &batch->attrib_bufs[st], st); } #endif } @@ -3086,12 +3204,19 @@ panfrost_update_state_3d(struct panfrost_batch *batch) bool attr_offsetted_by_instance_base = vstate->attr_depends_on_base_instance_mask & BITFIELD_MASK(vs->info.attributes_read_count); +#if PAN_ARCH >= 6 + /* Bifrost needs to place texel buffers after the image attributes, so we + * need to emit them if textures or the shader is dirty. */ + unsigned attribs_dirty_mask = + PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_TEXTURE | PAN_DIRTY_STAGE_SHADER; +#else + unsigned attribs_dirty_mask = PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_SHADER; +#endif /* Vertex data, vertex shader and images accessed by the vertex shader have * an impact on the attributes array, we need to re-emit anytime one of these * parameters changes. */ - if ((dirty & PAN_DIRTY_VERTEX) || - (vt_shader_dirty & (PAN_DIRTY_STAGE_IMAGE | PAN_DIRTY_STAGE_SHADER)) || + if ((dirty & PAN_DIRTY_VERTEX) || (vt_shader_dirty & attribs_dirty_mask) || attr_offsetted_by_instance_base) { batch->attribs[MESA_SHADER_VERTEX] = panfrost_emit_vertex_data( batch, &batch->attrib_bufs[MESA_SHADER_VERTEX]); diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index b65d56b1742..62246fc56ee 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -397,12 +397,18 @@ panfrost_set_sampler_views(struct pipe_context *pctx, if (view) new_nr = p + 1; + if (view && view->target == PIPE_BUFFER) + BITSET_SET(ctx->texture_buffer[shader].mask, p); + else + BITSET_CLEAR(ctx->texture_buffer[shader].mask, p); + pipe_sampler_view_reference( (struct pipe_sampler_view **)&ctx->sampler_views[shader][p], view); } for (; i < num_views + unbind_num_trailing_slots; i++) { unsigned p = i + start_slot; + BITSET_CLEAR(ctx->texture_buffer[shader].mask, p); pipe_sampler_view_reference( (struct pipe_sampler_view **)&ctx->sampler_views[shader][p], NULL); } @@ -417,8 +423,14 @@ panfrost_set_sampler_views(struct pipe_context *pctx, * set sampler views */ if (new_nr == 0) { for (i = 0; i < start_slot; ++i) { - if (ctx->sampler_views[shader][i]) + struct pipe_sampler_view *view = + (struct pipe_sampler_view *)ctx->sampler_views[shader][i]; + if (view) new_nr = i + 1; + if (view && view->target == PIPE_BUFFER) + BITSET_SET(ctx->texture_buffer[shader].mask, i); + else + BITSET_CLEAR(ctx->texture_buffer[shader].mask, i); } } diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index e7a558c3690..cd2dc980f5d 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -206,6 +206,9 @@ struct panfrost_context { struct panfrost_sampler_view *sampler_views[MESA_SHADER_STAGES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; unsigned sampler_view_count[MESA_SHADER_STAGES]; + struct { + BITSET_DECLARE(mask, PIPE_MAX_SHADER_SAMPLER_VIEWS); + } texture_buffer[MESA_SHADER_STAGES]; struct blitter_context *blitter; diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 9b6fc0f34e9..389ce021cb8 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -559,13 +559,20 @@ panfrost_create_shader_state(struct pipe_context *pctx, so->noperspective_varyings = pan_nir_collect_noperspective_varyings_fs(nir); - /* Vertex shaders get passed images through the vertex attribute descriptor - * array. We need to add an offset to all image intrinsics so they point - * to the right attribute. - */ + unsigned attrib_offset = 0; if (nir->info.stage == MESA_SHADER_VERTEX && dev->arch <= 7) { - NIR_PASS(_, nir, pan_nir_lower_image_index, - util_bitcount64(nir->info.inputs_read)); + /* Vertex shaders get passed images through the vertex attribute + * descriptor array. We need to add an offset to all image intrinsics so + * they point to the right attribute. + */ + attrib_offset += util_bitcount64(nir->info.inputs_read); + NIR_PASS(_, nir, pan_nir_lower_image_index, attrib_offset); + } + if (dev->arch >= 6 && dev->arch <= 7) { + /* Bifrost needs to use attributes to access texel buffers. We place these + * after images, which are also accessed using attributes. */ + attrib_offset += BITSET_LAST_BIT(nir->info.images_used); + NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch_index, attrib_offset); } /* If this shader uses transform feedback, compile the transform diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index a061e479327..28b44bd641b 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -1605,7 +1605,7 @@ static void bi_emit_image_load(bi_builder *b, nir_intrinsic_instr *instr) { enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); - assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) && + assert((dim != GLSL_SAMPLER_DIM_BUF) && "Texel buffers should already have been lowered"); unsigned coord_comps = nir_image_intrinsic_coord_components(instr); bool array = @@ -1657,7 +1657,7 @@ static void bi_emit_lea_image_to(bi_builder *b, bi_index dest, nir_intrinsic_instr *instr) { enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); - assert((b->shader->arch < 9 || dim != GLSL_SAMPLER_DIM_BUF) && + assert((dim != GLSL_SAMPLER_DIM_BUF) && "Texel buffers should already have been lowered"); bool array = nir_intrinsic_image_array(instr) || dim == GLSL_SAMPLER_DIM_CUBE; @@ -4242,6 +4242,10 @@ enum bifrost_tex_dreg { static void bi_emit_texc(bi_builder *b, nir_tex_instr *instr) { + assert((instr->op != nir_texop_txf || + instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) && + "Texel buffers should already have been lowered"); + struct bifrost_texture_operation desc = { .op = bi_tex_op(instr->op), .offset_or_bias_disable = false, /* TODO */ @@ -6600,10 +6604,8 @@ pan_nir_lower_buf_image_access(nir_shader *shader, unsigned arch) void bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id) { - if (pan_arch(gpu_id) >= 9) { - NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, pan_arch(gpu_id)); - NIR_PASS(_, nir, pan_nir_lower_buf_image_access, pan_arch(gpu_id)); - } + NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, pan_arch(gpu_id)); + NIR_PASS(_, nir, pan_nir_lower_buf_image_access, pan_arch(gpu_id)); } static int diff --git a/src/panfrost/compiler/pan_compiler.c b/src/panfrost/compiler/pan_compiler.c index 7ce62c5b297..509c9aac681 100644 --- a/src/panfrost/compiler/pan_compiler.c +++ b/src/panfrost/compiler/pan_compiler.c @@ -112,7 +112,8 @@ pan_nir_lower_texture_late(nir_shader *nir, unsigned gpu_id) { /* This must be called after any lowering of resource indices * (panfrost_nir_lower_res_indices / panvk_per_arch(nir_lower_descriptors)) - */ + * and lowering of attribute indices (pan_nir_lower_image_index / + * pan_nir_lower_texel_buffer_fetch_index) */ if (pan_arch(gpu_id) >= 6) bifrost_lower_texture_late_nir(nir, gpu_id); } @@ -281,7 +282,11 @@ pan_shader_update_info(struct pan_shader_info *info, nir_shader *s, } info->outputs_written = s->info.outputs_written; + info->images_used = + s->info.images_used[0] | ((uint64_t)s->info.images_used[1]) << 32; info->attribute_count += BITSET_LAST_BIT(s->info.images_used); + if (arch >= 6 && arch < 9) + info->attribute_count += BITSET_LAST_BIT(s->info.texture_buffers); info->writes_global = s->info.writes_memory; info->ubo_count = s->info.num_ubos; diff --git a/src/panfrost/compiler/pan_compiler.h b/src/panfrost/compiler/pan_compiler.h index 1c4c789a7c1..1469c5a6b77 100644 --- a/src/panfrost/compiler/pan_compiler.h +++ b/src/panfrost/compiler/pan_compiler.h @@ -332,6 +332,7 @@ struct pan_shader_info { unsigned attributes_read_count; unsigned attribute_count; unsigned attributes_read; + uint64_t images_used; struct { unsigned input_count; diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h index af067b0cad5..4f09d0759b3 100644 --- a/src/panfrost/lib/pan_desc.h +++ b/src/panfrost/lib/pan_desc.h @@ -42,6 +42,7 @@ struct pan_buffer_view { } astc; unsigned width_el; uint64_t base; + uint32_t offset; }; struct pan_compute_dim { diff --git a/src/panfrost/lib/pan_texture.c b/src/panfrost/lib/pan_texture.c index a3a5f850f58..62f7fd9e6dc 100644 --- a/src/panfrost/lib/pan_texture.c +++ b/src/panfrost/lib/pan_texture.c @@ -1350,6 +1350,30 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, } } +#elif PAN_ARCH >= 6 + +void +GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, + struct mali_attribute_buffer_packed *out_buf, + struct mali_attribute_packed *out_attrib) +{ + unsigned stride = util_format_get_blocksize(bview->format); + uint32_t hw_fmt = GENX(pan_format_from_pipe_format)(bview->format)->hw; + + pan_pack(out_buf, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.pointer = bview->base; + cfg.stride = stride; + cfg.size = bview->width_el * stride; + } + + pan_pack(out_attrib, ATTRIBUTE, cfg) { + cfg.format = hw_fmt; + cfg.offset = bview->offset; + cfg.offset_enable = bview->offset != 0; + } +} + #else void @@ -1377,12 +1401,6 @@ GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, cfg.texel_ordering = MALI_TEXTURE_LAYOUT_LINEAR; cfg.levels = 1; cfg.array_size = 1; - -#if PAN_ARCH >= 6 - cfg.surfaces = payload->gpu; - cfg.minimum_lod = 0; - cfg.maximum_lod = 0; -#endif } } diff --git a/src/panfrost/lib/pan_texture.h b/src/panfrost/lib/pan_texture.h index 6c0fb6291b6..9e5b2e10f58 100644 --- a/src/panfrost/lib/pan_texture.h +++ b/src/panfrost/lib/pan_texture.h @@ -96,6 +96,10 @@ void GENX(pan_tex_emit_afrc_payload_entry)( #if PAN_ARCH >= 9 void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, struct mali_buffer_packed *out); +#elif PAN_ARCH >= 6 +void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, + struct mali_attribute_buffer_packed *out_buf, + struct mali_attribute_packed *out_attrib); #else void GENX(pan_buffer_texture_emit)(const struct pan_buffer_view *bview, struct mali_texture_packed *out, diff --git a/src/panfrost/vulkan/panvk_buffer_view.h b/src/panfrost/vulkan/panvk_buffer_view.h index 3bce7d4d865..4b456f92caf 100644 --- a/src/panfrost/vulkan/panvk_buffer_view.h +++ b/src/panfrost/vulkan/panvk_buffer_view.h @@ -21,18 +21,12 @@ struct panvk_buffer_view { struct vk_buffer_view vk; -#if PAN_ARCH < 9 - struct panvk_priv_mem mem; -#endif - struct { #if PAN_ARCH >= 9 struct mali_buffer_packed buf; #else - /* TODO: move Bifrost over to using BufferDescriptor as well. */ - struct mali_texture_packed tex; - - struct mali_attribute_buffer_packed img_attrib_buf[2]; + struct mali_attribute_buffer_packed attrib_buf; + struct mali_attribute_packed attrib; #endif } descs; }; diff --git a/src/panfrost/vulkan/panvk_vX_buffer_view.c b/src/panfrost/vulkan/panvk_vX_buffer_view.c index fa78e261198..64b13d0c138 100644 --- a/src/panfrost/vulkan/panvk_vX_buffer_view.c +++ b/src/panfrost/vulkan/panvk_vX_buffer_view.c @@ -44,75 +44,36 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, enum pipe_format pfmt = vk_format_to_pipe_format(view->vk.format); - uint64_t address = panvk_buffer_gpu_ptr(buffer, pCreateInfo->offset); - VkBufferUsageFlags tex_usage_mask = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; - -#if PAN_ARCH >= 9 - tex_usage_mask |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; -#else - /* This alignment constraint only applies when TextureDescriptors are used. */ - assert(!(address & 63)); -#endif + VkBufferUsageFlags tex_usage_mask = + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; if (buffer->vk.usage & tex_usage_mask) { +#if PAN_ARCH >= 9 struct pan_buffer_view bview = { .format = pfmt, - .astc.hdr = util_format_is_astc_hdr(pfmt), .width_el = view->vk.elements, - .base = address, + .base = panvk_buffer_gpu_ptr(buffer, pCreateInfo->offset), }; -#if PAN_ARCH >= 9 GENX(pan_buffer_texture_emit)(&bview, &view->descs.buf); #else - view->mem = - panvk_pool_alloc_desc(&device->mempools.rw, SURFACE_WITH_STRIDE); - if (!panvk_priv_mem_check_alloc(view->mem)) - return panvk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + /* Bifrost requires the base address to be 64 byte aligned and passes the + * remaing offset through the Attribute Descriptor. */ + uint64_t aligned_offset = pCreateInfo->offset & ~0x3f; + uint32_t remainder_offset = pCreateInfo->offset & 0x3f; + struct pan_buffer_view bview = { + .format = pfmt, + .width_el = view->vk.elements, + .base = panvk_buffer_gpu_ptr(buffer, aligned_offset), + .offset = remainder_offset, + }; - panvk_priv_mem_write(view->mem, 0, struct mali_surface_with_stride_packed, sd) { - struct pan_ptr ptr = { - .gpu = panvk_priv_mem_dev_addr(view->mem), - .cpu = sd, - }; - - GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr); - } + GENX(pan_buffer_texture_emit)(&bview, &view->descs.attrib_buf, + &view->descs.attrib); #endif } -#if PAN_ARCH < 9 - if (buffer->vk.usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) { - unsigned blksz = vk_format_get_blocksize(pCreateInfo->format); - - pan_pack(&view->descs.img_attrib_buf[0], ATTRIBUTE_BUFFER, cfg) { - /* The format is the only thing we lack to emit attribute descriptors - * when copying from the set to the attribute tables. Instead of - * making the descriptor size to store an extra format, we pack - * the 22-bit format with the texel stride, which is expected to be - * fit in remaining 10 bits. - */ - uint32_t hw_fmt = GENX(pan_format_from_pipe_format)(pfmt)->hw; - - assert(blksz < BITFIELD_MASK(10)); - assert(hw_fmt < BITFIELD_MASK(22)); - - cfg.type = MALI_ATTRIBUTE_TYPE_3D_LINEAR; - cfg.pointer = address; - cfg.stride = blksz | (hw_fmt << 10); - cfg.size = view->vk.elements * blksz; - } - - struct mali_attribute_buffer_packed *buf = &view->descs.img_attrib_buf[1]; - pan_cast_and_pack(buf, ATTRIBUTE_BUFFER_CONTINUATION_3D, cfg) { - cfg.s_dimension = view->vk.elements; - cfg.t_dimension = 1; - cfg.r_dimension = 1; - cfg.row_stride = view->vk.elements * blksz; - } - } -#endif - *pView = panvk_buffer_view_to_handle(view); return VK_SUCCESS; } @@ -127,9 +88,5 @@ panvk_per_arch(DestroyBufferView)(VkDevice _device, VkBufferView bufferView, if (!view) return; -#if PAN_ARCH < 9 - panvk_pool_free_mem(&view->mem); -#endif - vk_buffer_view_destroy(&device->vk, pAllocator, &view->vk); } diff --git a/src/panfrost/vulkan/panvk_vX_descriptor_set.c b/src/panfrost/vulkan/panvk_vX_descriptor_set.c index 37b0b812603..2905692fcc3 100644 --- a/src/panfrost/vulkan/panvk_vX_descriptor_set.c +++ b/src/panfrost/vulkan/panvk_vX_descriptor_set.c @@ -252,10 +252,16 @@ write_buffer_view_desc(struct panvk_descriptor_set *set, VK_FROM_HANDLE(panvk_buffer_view, view, bufferView); #if PAN_ARCH < 9 - if (type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER) - write_desc(set, binding, elem, &view->descs.img_attrib_buf, NO_SUBDESC); - else - write_desc(set, binding, elem, &view->descs.tex, NO_SUBDESC); + struct { + struct mali_attribute_buffer_packed attr_buf_desc; + struct mali_attribute_packed attr_desc; + uint32_t pad[2]; + } padded_desc = { + .attr_buf_desc = view->descs.attrib_buf, + .attr_desc = view->descs.attrib, + }; + + write_desc(set, binding, elem, &padded_desc, NO_SUBDESC); #else write_desc(set, binding, elem, &view->descs.buf, NO_SUBDESC); #endif diff --git a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c index 78cdab02282..d7044fbdb07 100644 --- a/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c +++ b/src/panfrost/vulkan/panvk_vX_nir_lower_descriptors.c @@ -130,13 +130,13 @@ desc_type_to_table_type( return sampler_subdesc ? PANVK_BIFROST_DESC_TABLE_SAMPLER : PANVK_BIFROST_DESC_TABLE_TEXTURE; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: return PANVK_BIFROST_DESC_TABLE_TEXTURE; case VK_DESCRIPTOR_TYPE_SAMPLER: return PANVK_BIFROST_DESC_TABLE_SAMPLER; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: return PANVK_BIFROST_DESC_TABLE_IMG; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: @@ -598,12 +598,10 @@ load_tex_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim, b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 16, 1, 32, ctx); loaded_size = nir_idiv(b, size, stride); #else - nir_def *tex_w = load_resource_deref_desc( - b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 4, 1, 16, ctx); - - /* S dimension is 16 bits wide. We don't support combining S,T dimensions - * to allow large buffers yet. */ - loaded_size = nir_iadd_imm(b, nir_u2u32(b, tex_w), 1); + nir_def *stride_size = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 8, 2, 32, ctx); + loaded_size = nir_idiv(b, nir_channel(b, stride_size, 1), + nir_channel(b, stride_size, 0)); #endif } else { nir_def *tex_w_h = load_resource_deref_desc( @@ -644,12 +642,10 @@ load_img_size(nir_builder *b, nir_deref_instr *deref, enum glsl_sampler_dim dim, return load_tex_size(b, deref, dim, is_array, ctx); if (dim == GLSL_SAMPLER_DIM_BUF) { - nir_def *tex_w = load_resource_deref_desc( - b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 1, 16, ctx); - - /* S dimension is 16 bits wide. We don't support combining S,T dimensions - * to allow large buffers yet. */ - return nir_iadd_imm(b, nir_u2u32(b, tex_w), 1); + nir_def *stride_size = load_resource_deref_desc( + b, deref, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 8, 2, 32, ctx); + return nir_idiv(b, nir_channel(b, stride_size, 1), + nir_channel(b, stride_size, 0)); } else { nir_def *tex_sz = load_resource_deref_desc( b, deref, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 18, 3, 16, ctx); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 67b42759086..b8b4522e4e5 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -894,7 +894,6 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, struct pan_compile_inputs input = *compile_input; pan_postprocess_nir(nir, input.gpu_id); - pan_nir_lower_texture_late(nir, input.gpu_id); if (nir->info.stage == MESA_SHADER_VERTEX) NIR_PASS(_, nir, nir_shader_intrinsics_pass, panvk_lower_load_vs_input, @@ -906,8 +905,11 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, /* since valhall, panvk_per_arch(nir_lower_descriptors) separates the * driver set and the user sets, and does not need pan_nir_lower_image_index */ - if (PAN_ARCH < 9 && nir->info.stage == MESA_SHADER_VERTEX) + if (PAN_ARCH < 9 && nir->info.stage == MESA_SHADER_VERTEX) { NIR_PASS(_, nir, pan_nir_lower_image_index, MAX_VS_ATTRIBS); + NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch_index, MAX_VS_ATTRIBS); + } + pan_nir_lower_texture_late(nir, input.gpu_id); if (noperspective_varyings && nir->info.stage == MESA_SHADER_VERTEX) { NIR_PASS(_, nir, nir_inline_sysval,