panvk: Move the linking bits to panvk_shader

Needed if we support late linking which is required for
VK_EXT_shader_object.

We also stop pretending the linking is generic and reflect the fact we
always link vertex with fragment shaders.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29161>
This commit is contained in:
Mary Guillemard 2024-06-14 10:19:01 +02:00 committed by Marge Bot
parent 9c39185e20
commit 05020699b9
5 changed files with 229 additions and 207 deletions

View file

@ -29,11 +29,6 @@
struct panvk_pipeline_shader {
struct panvk_shader *base;
struct {
struct panvk_priv_mem attribs;
unsigned buf_strides[PANVK_VARY_BUF_MAX];
} varyings;
struct pan_shader_info info;
};
@ -57,6 +52,7 @@ struct panvk_graphics_pipeline {
struct panvk_pipeline_shader vs;
struct panvk_pipeline_shader fs;
struct panvk_shader_link link;
struct {
struct vk_dynamic_graphics_state dynamic;

View file

@ -89,203 +89,6 @@ cleanup_pipeline_shader(struct panvk_pipeline *pipeline,
panvk_per_arch(shader_destroy)(dev, pshader->base, alloc);
}
static mali_pixel_format
get_varying_format(gl_shader_stage stage, gl_varying_slot loc,
enum pipe_format pfmt)
{
switch (loc) {
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
#if PAN_ARCH <= 6
return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
#else
return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
#endif
case VARYING_SLOT_POS:
#if PAN_ARCH <= 6
return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
#else
return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
#endif
default:
assert(pfmt != PIPE_FORMAT_NONE);
return GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
}
}
struct varyings_info {
enum pipe_format fmts[VARYING_SLOT_MAX];
BITSET_DECLARE(active, VARYING_SLOT_MAX);
};
static void
collect_varyings_info(const struct pan_shader_varying *varyings,
unsigned varying_count, struct varyings_info *info)
{
for (unsigned i = 0; i < varying_count; i++) {
gl_varying_slot loc = varyings[i].location;
if (varyings[i].format == PIPE_FORMAT_NONE)
continue;
info->fmts[loc] = varyings[i].format;
BITSET_SET(info->active, loc);
}
}
static inline enum panvk_varying_buf_id
varying_buf_id(gl_varying_slot loc)
{
switch (loc) {
case VARYING_SLOT_POS:
return PANVK_VARY_BUF_POSITION;
case VARYING_SLOT_PSIZ:
return PANVK_VARY_BUF_PSIZ;
default:
return PANVK_VARY_BUF_GENERAL;
}
}
static mali_pixel_format
varying_format(gl_varying_slot loc, enum pipe_format pfmt)
{
switch (loc) {
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
#if PAN_ARCH <= 6
return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
#else
return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
#endif
case VARYING_SLOT_POS:
#if PAN_ARCH <= 6
return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
#else
return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
#endif
default:
return GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
}
}
static struct panvk_priv_mem
emit_varying_attrs(struct panvk_pool *desc_pool,
const struct pan_shader_varying *varyings,
unsigned varying_count, const struct varyings_info *info,
unsigned *buf_offsets)
{
unsigned attr_count = BITSET_COUNT(info->active);
struct panvk_priv_mem mem =
panvk_pool_alloc_desc_array(desc_pool, attr_count, ATTRIBUTE);
struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(mem);
unsigned attr_idx = 0;
for (unsigned i = 0; i < varying_count; i++) {
pan_pack(&attrs[attr_idx++], ATTRIBUTE, cfg) {
gl_varying_slot loc = varyings[i].location;
enum pipe_format pfmt = varyings[i].format != PIPE_FORMAT_NONE
? info->fmts[loc]
: PIPE_FORMAT_NONE;
if (pfmt == PIPE_FORMAT_NONE) {
#if PAN_ARCH >= 7
cfg.format = (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
#else
cfg.format = (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
#endif
} else {
cfg.buffer_index = varying_buf_id(loc);
cfg.offset = buf_offsets[loc];
cfg.format = varying_format(loc, info->fmts[loc]);
}
cfg.offset_enable = false;
}
}
return mem;
}
static void
link_shaders(struct panvk_graphics_pipeline *pipeline,
struct panvk_pipeline_shader *stage,
struct panvk_pipeline_shader *next_stage)
{
struct panvk_device *dev = to_panvk_device(pipeline->base.base.device);
BITSET_DECLARE(active_attrs, VARYING_SLOT_MAX) = {0};
unsigned buf_strides[PANVK_VARY_BUF_MAX] = {0};
unsigned buf_offsets[VARYING_SLOT_MAX] = {0};
struct varyings_info out_vars = {0};
struct varyings_info in_vars = {0};
unsigned loc;
collect_varyings_info(stage->info.varyings.output,
stage->info.varyings.output_count, &out_vars);
collect_varyings_info(next_stage->info.varyings.input,
next_stage->info.varyings.input_count, &in_vars);
BITSET_OR(active_attrs, in_vars.active, out_vars.active);
/* Handle the position and point size buffers explicitly, as they are
* passed through separate buffer pointers to the tiler job.
*/
if (next_stage->info.stage == MESA_SHADER_FRAGMENT) {
if (BITSET_TEST(out_vars.active, VARYING_SLOT_POS)) {
buf_strides[PANVK_VARY_BUF_POSITION] = sizeof(float) * 4;
BITSET_CLEAR(active_attrs, VARYING_SLOT_POS);
}
if (BITSET_TEST(out_vars.active, VARYING_SLOT_PSIZ)) {
buf_strides[PANVK_VARY_BUF_PSIZ] = sizeof(uint16_t);
BITSET_CLEAR(active_attrs, VARYING_SLOT_PSIZ);
}
}
BITSET_FOREACH_SET(loc, active_attrs, VARYING_SLOT_MAX) {
/* We expect stage to write to all inputs read by next_stage, and
* next_stage to read all inputs written by stage. If that's not the
* case, we keep PIPE_FORMAT_NONE to reflect the fact we should use a
* sink attribute (writes are discarded, reads return zeros).
*/
if (in_vars.fmts[loc] == PIPE_FORMAT_NONE ||
out_vars.fmts[loc] == PIPE_FORMAT_NONE) {
in_vars.fmts[loc] = PIPE_FORMAT_NONE;
out_vars.fmts[loc] = PIPE_FORMAT_NONE;
continue;
}
unsigned out_size = util_format_get_blocksize(out_vars.fmts[loc]);
unsigned buf_idx = varying_buf_id(loc);
/* Always trust the 'next_stage' input format, so we can:
* - discard components that are never read
* - use float types for interpolated fragment shader inputs
* - use fp16 for floats with mediump
* - make sure components that are not written by 'stage' are set to zero
*/
out_vars.fmts[loc] = in_vars.fmts[loc];
/* Special buffers are handled explicitly before this loop, everything
* else should be laid out in the general varying buffer.
*/
assert(buf_idx == PANVK_VARY_BUF_GENERAL);
/* Keep things aligned a 32-bit component. */
buf_offsets[loc] = buf_strides[buf_idx];
buf_strides[buf_idx] += ALIGN_POT(out_size, 4);
}
stage->varyings.attribs = emit_varying_attrs(
&dev->mempools.rw, stage->info.varyings.output,
stage->info.varyings.output_count, &out_vars, buf_offsets);
next_stage->varyings.attribs = emit_varying_attrs(
&dev->mempools.rw, next_stage->info.varyings.input,
next_stage->info.varyings.input_count, &in_vars, buf_offsets);
memcpy(stage->varyings.buf_strides, buf_strides,
sizeof(stage->varyings.buf_strides));
memcpy(next_stage->varyings.buf_strides, buf_strides,
sizeof(next_stage->varyings.buf_strides));
}
static VkResult
panvk_graphics_pipeline_create(struct panvk_device *dev,
struct vk_pipeline_cache *cache,
@ -344,7 +147,8 @@ panvk_graphics_pipeline_create(struct panvk_device *dev,
return result;
}
link_shaders(gfx_pipeline, &gfx_pipeline->vs, &gfx_pipeline->fs);
panvk_per_arch(link_shaders)(&dev->mempools.rw, gfx_pipeline->vs.base,
gfx_pipeline->fs.base, &gfx_pipeline->link);
return VK_SUCCESS;
}
@ -445,6 +249,7 @@ panvk_per_arch(DestroyPipeline)(VkDevice _device, VkPipeline _pipeline,
struct panvk_graphics_pipeline *gfx_pipeline =
panvk_pipeline_to_graphics_pipeline(pipeline);
panvk_shader_link_cleanup(&device->mempools.rw, &gfx_pipeline->link);
cleanup_pipeline_shader(pipeline, &gfx_pipeline->vs, pAllocator);
cleanup_pipeline_shader(pipeline, &gfx_pipeline->fs, pAllocator);
} else {

View file

@ -511,7 +511,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
mali_ptr psiz_buf = 0;
for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
unsigned buf_size = vertex_count * pipeline->vs.varyings.buf_strides[i];
unsigned buf_size = vertex_count * pipeline->link.buf_strides[i];
mali_ptr buf_addr =
buf_size
? pan_pool_alloc_aligned(&cmdbuf->varying_pool.base, buf_size, 64)
@ -519,7 +519,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
: 0;
pan_pack(&buf_descs[i], ATTRIBUTE_BUFFER, cfg) {
cfg.stride = pipeline->vs.varyings.buf_strides[i];
cfg.stride = pipeline->link.buf_strides[i];
cfg.size = buf_size;
cfg.pointer = buf_addr;
}
@ -544,8 +544,8 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
draw->line_width = 1.0f;
draw->varying_bufs = bufs.gpu;
draw->vs.varyings = panvk_priv_mem_dev_addr(pipeline->vs.varyings.attribs);
draw->fs.varyings = panvk_priv_mem_dev_addr(pipeline->fs.varyings.attribs);
draw->vs.varyings = panvk_priv_mem_dev_addr(pipeline->link.vs.attribs);
draw->fs.varyings = panvk_priv_mem_dev_addr(pipeline->link.fs.attribs);
}
static void

View file

@ -126,6 +126,13 @@ struct panvk_shader {
struct panvk_priv_mem rsd;
};
struct panvk_shader_link {
struct {
struct panvk_priv_mem attribs;
} vs, fs;
unsigned buf_strides[PANVK_VARY_BUF_MAX];
};
static inline mali_ptr
panvk_shader_get_dev_addr(const struct panvk_shader *shader)
{
@ -140,6 +147,19 @@ void panvk_per_arch(shader_destroy)(struct panvk_device *dev,
struct panvk_shader *shader,
const VkAllocationCallbacks *alloc);
void panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool,
struct panvk_shader *vs,
struct panvk_shader *fs,
struct panvk_shader_link *link);
static inline void
panvk_shader_link_cleanup(struct panvk_pool *desc_pool,
struct panvk_shader_link *link)
{
panvk_pool_free_mem(desc_pool, link->vs.attribs);
panvk_pool_free_mem(desc_pool, link->fs.attribs);
}
bool panvk_per_arch(nir_lower_descriptors)(
nir_shader *nir, struct panvk_device *dev,
const struct vk_pipeline_layout *layout, struct panvk_shader *shader);

View file

@ -413,3 +413,204 @@ panvk_per_arch(shader_destroy)(struct panvk_device *dev,
free((void *)shader->bin_ptr);
vk_free2(&dev->vk.alloc, alloc, shader);
}
static mali_pixel_format
get_varying_format(gl_shader_stage stage, gl_varying_slot loc,
enum pipe_format pfmt)
{
switch (loc) {
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
#if PAN_ARCH <= 6
return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
#else
return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
#endif
case VARYING_SLOT_POS:
#if PAN_ARCH <= 6
return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
#else
return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
#endif
default:
assert(pfmt != PIPE_FORMAT_NONE);
return GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
}
}
struct varyings_info {
enum pipe_format fmts[VARYING_SLOT_MAX];
BITSET_DECLARE(active, VARYING_SLOT_MAX);
};
static void
collect_varyings_info(const struct pan_shader_varying *varyings,
unsigned varying_count, struct varyings_info *info)
{
for (unsigned i = 0; i < varying_count; i++) {
gl_varying_slot loc = varyings[i].location;
if (varyings[i].format == PIPE_FORMAT_NONE)
continue;
info->fmts[loc] = varyings[i].format;
BITSET_SET(info->active, loc);
}
}
static inline enum panvk_varying_buf_id
varying_buf_id(gl_varying_slot loc)
{
switch (loc) {
case VARYING_SLOT_POS:
return PANVK_VARY_BUF_POSITION;
case VARYING_SLOT_PSIZ:
return PANVK_VARY_BUF_PSIZ;
default:
return PANVK_VARY_BUF_GENERAL;
}
}
static mali_pixel_format
varying_format(gl_varying_slot loc, enum pipe_format pfmt)
{
switch (loc) {
case VARYING_SLOT_PNTC:
case VARYING_SLOT_PSIZ:
#if PAN_ARCH <= 6
return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
#else
return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
#endif
case VARYING_SLOT_POS:
#if PAN_ARCH <= 6
return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
#else
return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
#endif
default:
return GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
}
}
static struct panvk_priv_mem
emit_varying_attrs(struct panvk_pool *desc_pool,
const struct pan_shader_varying *varyings,
unsigned varying_count, const struct varyings_info *info,
unsigned *buf_offsets)
{
unsigned attr_count = BITSET_COUNT(info->active);
struct panvk_priv_mem mem =
panvk_pool_alloc_desc_array(desc_pool, attr_count, ATTRIBUTE);
struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(mem);
unsigned attr_idx = 0;
for (unsigned i = 0; i < varying_count; i++) {
pan_pack(&attrs[attr_idx++], ATTRIBUTE, cfg) {
gl_varying_slot loc = varyings[i].location;
enum pipe_format pfmt = varyings[i].format != PIPE_FORMAT_NONE
? info->fmts[loc]
: PIPE_FORMAT_NONE;
if (pfmt == PIPE_FORMAT_NONE) {
#if PAN_ARCH >= 7
cfg.format = (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
#else
cfg.format = (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
#endif
} else {
cfg.buffer_index = varying_buf_id(loc);
cfg.offset = buf_offsets[loc];
cfg.format = varying_format(loc, info->fmts[loc]);
}
cfg.offset_enable = false;
}
}
return mem;
}
void
panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool,
struct panvk_shader *vs, struct panvk_shader *fs,
struct panvk_shader_link *link)
{
BITSET_DECLARE(active_attrs, VARYING_SLOT_MAX) = {0};
unsigned buf_strides[PANVK_VARY_BUF_MAX] = {0};
unsigned buf_offsets[VARYING_SLOT_MAX] = {0};
struct varyings_info out_vars = {0};
struct varyings_info in_vars = {0};
unsigned loc;
assert(vs);
assert(vs->info.stage == MESA_SHADER_VERTEX);
collect_varyings_info(vs->info.varyings.output,
vs->info.varyings.output_count, &out_vars);
if (fs) {
assert(fs->info.stage == MESA_SHADER_FRAGMENT);
collect_varyings_info(fs->info.varyings.input,
fs->info.varyings.input_count, &in_vars);
}
BITSET_OR(active_attrs, in_vars.active, out_vars.active);
/* Handle the position and point size buffers explicitly, as they are
* passed through separate buffer pointers to the tiler job.
*/
if (BITSET_TEST(out_vars.active, VARYING_SLOT_POS)) {
buf_strides[PANVK_VARY_BUF_POSITION] = sizeof(float) * 4;
BITSET_CLEAR(active_attrs, VARYING_SLOT_POS);
}
if (BITSET_TEST(out_vars.active, VARYING_SLOT_PSIZ)) {
buf_strides[PANVK_VARY_BUF_PSIZ] = sizeof(uint16_t);
BITSET_CLEAR(active_attrs, VARYING_SLOT_PSIZ);
}
BITSET_FOREACH_SET(loc, active_attrs, VARYING_SLOT_MAX) {
/* We expect the VS to write to all inputs read by the FS, and the
* FS to read all inputs written by the VS. If that's not the
* case, we keep PIPE_FORMAT_NONE to reflect the fact we should use a
* sink attribute (writes are discarded, reads return zeros).
*/
if (in_vars.fmts[loc] == PIPE_FORMAT_NONE ||
out_vars.fmts[loc] == PIPE_FORMAT_NONE) {
in_vars.fmts[loc] = PIPE_FORMAT_NONE;
out_vars.fmts[loc] = PIPE_FORMAT_NONE;
continue;
}
unsigned out_size = util_format_get_blocksize(out_vars.fmts[loc]);
unsigned buf_idx = varying_buf_id(loc);
/* Always trust the VS input format, so we can:
* - discard components that are never read
* - use float types for interpolated fragment shader inputs
* - use fp16 for floats with mediump
* - make sure components that are not written by the FS are set to zero
*/
out_vars.fmts[loc] = in_vars.fmts[loc];
/* Special buffers are handled explicitly before this loop, everything
* else should be laid out in the general varying buffer.
*/
assert(buf_idx == PANVK_VARY_BUF_GENERAL);
/* Keep things aligned a 32-bit component. */
buf_offsets[loc] = buf_strides[buf_idx];
buf_strides[buf_idx] += ALIGN_POT(out_size, 4);
}
link->vs.attribs = emit_varying_attrs(desc_pool, vs->info.varyings.output,
vs->info.varyings.output_count,
&out_vars, buf_offsets);
if (fs)
link->fs.attribs = emit_varying_attrs(desc_pool, fs->info.varyings.input,
fs->info.varyings.input_count,
&in_vars, buf_offsets);
memcpy(link->buf_strides, buf_strides, sizeof(link->buf_strides));
}