diff --git a/src/panfrost/vulkan/bifrost/panvk_pipeline.h b/src/panfrost/vulkan/bifrost/panvk_pipeline.h index 0eebb0b0dc0..551dfb3cdbe 100644 --- a/src/panfrost/vulkan/bifrost/panvk_pipeline.h +++ b/src/panfrost/vulkan/bifrost/panvk_pipeline.h @@ -29,11 +29,6 @@ struct panvk_pipeline_shader { struct panvk_shader *base; - struct { - struct panvk_priv_mem attribs; - unsigned buf_strides[PANVK_VARY_BUF_MAX]; - } varyings; - struct pan_shader_info info; }; @@ -57,6 +52,7 @@ struct panvk_graphics_pipeline { struct panvk_pipeline_shader vs; struct panvk_pipeline_shader fs; + struct panvk_shader_link link; struct { struct vk_dynamic_graphics_state dynamic; diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c index 81b5c78d2ad..7a1996aa947 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c @@ -89,203 +89,6 @@ cleanup_pipeline_shader(struct panvk_pipeline *pipeline, panvk_per_arch(shader_destroy)(dev, pshader->base, alloc); } -static mali_pixel_format -get_varying_format(gl_shader_stage stage, gl_varying_slot loc, - enum pipe_format pfmt) -{ - switch (loc) { - case VARYING_SLOT_PNTC: - case VARYING_SLOT_PSIZ: -#if PAN_ARCH <= 6 - return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); -#else - return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000; -#endif - case VARYING_SLOT_POS: -#if PAN_ARCH <= 6 - return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4); -#else - return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA; -#endif - default: - assert(pfmt != PIPE_FORMAT_NONE); - return GENX(panfrost_format_from_pipe_format)(pfmt)->hw; - } -} - -struct varyings_info { - enum pipe_format fmts[VARYING_SLOT_MAX]; - BITSET_DECLARE(active, VARYING_SLOT_MAX); -}; - -static void -collect_varyings_info(const struct pan_shader_varying *varyings, - unsigned varying_count, struct varyings_info *info) -{ - for (unsigned i = 0; i < varying_count; i++) { - gl_varying_slot loc = varyings[i].location; - - if (varyings[i].format == PIPE_FORMAT_NONE) - continue; - - info->fmts[loc] = varyings[i].format; - BITSET_SET(info->active, loc); - } -} - -static inline enum panvk_varying_buf_id -varying_buf_id(gl_varying_slot loc) -{ - switch (loc) { - case VARYING_SLOT_POS: - return PANVK_VARY_BUF_POSITION; - case VARYING_SLOT_PSIZ: - return PANVK_VARY_BUF_PSIZ; - default: - return PANVK_VARY_BUF_GENERAL; - } -} - -static mali_pixel_format -varying_format(gl_varying_slot loc, enum pipe_format pfmt) -{ - switch (loc) { - case VARYING_SLOT_PNTC: - case VARYING_SLOT_PSIZ: -#if PAN_ARCH <= 6 - return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); -#else - return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000; -#endif - case VARYING_SLOT_POS: -#if PAN_ARCH <= 6 - return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4); -#else - return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA; -#endif - default: - return GENX(panfrost_format_from_pipe_format)(pfmt)->hw; - } -} - -static struct panvk_priv_mem -emit_varying_attrs(struct panvk_pool *desc_pool, - const struct pan_shader_varying *varyings, - unsigned varying_count, const struct varyings_info *info, - unsigned *buf_offsets) -{ - unsigned attr_count = BITSET_COUNT(info->active); - struct panvk_priv_mem mem = - panvk_pool_alloc_desc_array(desc_pool, attr_count, ATTRIBUTE); - struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(mem); - unsigned attr_idx = 0; - - for (unsigned i = 0; i < varying_count; i++) { - pan_pack(&attrs[attr_idx++], ATTRIBUTE, cfg) { - gl_varying_slot loc = varyings[i].location; - enum pipe_format pfmt = varyings[i].format != PIPE_FORMAT_NONE - ? info->fmts[loc] - : PIPE_FORMAT_NONE; - - if (pfmt == PIPE_FORMAT_NONE) { -#if PAN_ARCH >= 7 - cfg.format = (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; -#else - cfg.format = (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); -#endif - } else { - cfg.buffer_index = varying_buf_id(loc); - cfg.offset = buf_offsets[loc]; - cfg.format = varying_format(loc, info->fmts[loc]); - } - cfg.offset_enable = false; - } - } - - return mem; -} - -static void -link_shaders(struct panvk_graphics_pipeline *pipeline, - struct panvk_pipeline_shader *stage, - struct panvk_pipeline_shader *next_stage) -{ - struct panvk_device *dev = to_panvk_device(pipeline->base.base.device); - BITSET_DECLARE(active_attrs, VARYING_SLOT_MAX) = {0}; - unsigned buf_strides[PANVK_VARY_BUF_MAX] = {0}; - unsigned buf_offsets[VARYING_SLOT_MAX] = {0}; - struct varyings_info out_vars = {0}; - struct varyings_info in_vars = {0}; - unsigned loc; - - collect_varyings_info(stage->info.varyings.output, - stage->info.varyings.output_count, &out_vars); - collect_varyings_info(next_stage->info.varyings.input, - next_stage->info.varyings.input_count, &in_vars); - - BITSET_OR(active_attrs, in_vars.active, out_vars.active); - - /* Handle the position and point size buffers explicitly, as they are - * passed through separate buffer pointers to the tiler job. - */ - if (next_stage->info.stage == MESA_SHADER_FRAGMENT) { - if (BITSET_TEST(out_vars.active, VARYING_SLOT_POS)) { - buf_strides[PANVK_VARY_BUF_POSITION] = sizeof(float) * 4; - BITSET_CLEAR(active_attrs, VARYING_SLOT_POS); - } - - if (BITSET_TEST(out_vars.active, VARYING_SLOT_PSIZ)) { - buf_strides[PANVK_VARY_BUF_PSIZ] = sizeof(uint16_t); - BITSET_CLEAR(active_attrs, VARYING_SLOT_PSIZ); - } - } - - BITSET_FOREACH_SET(loc, active_attrs, VARYING_SLOT_MAX) { - /* We expect stage to write to all inputs read by next_stage, and - * next_stage to read all inputs written by stage. If that's not the - * case, we keep PIPE_FORMAT_NONE to reflect the fact we should use a - * sink attribute (writes are discarded, reads return zeros). - */ - if (in_vars.fmts[loc] == PIPE_FORMAT_NONE || - out_vars.fmts[loc] == PIPE_FORMAT_NONE) { - in_vars.fmts[loc] = PIPE_FORMAT_NONE; - out_vars.fmts[loc] = PIPE_FORMAT_NONE; - continue; - } - - unsigned out_size = util_format_get_blocksize(out_vars.fmts[loc]); - unsigned buf_idx = varying_buf_id(loc); - - /* Always trust the 'next_stage' input format, so we can: - * - discard components that are never read - * - use float types for interpolated fragment shader inputs - * - use fp16 for floats with mediump - * - make sure components that are not written by 'stage' are set to zero - */ - out_vars.fmts[loc] = in_vars.fmts[loc]; - - /* Special buffers are handled explicitly before this loop, everything - * else should be laid out in the general varying buffer. - */ - assert(buf_idx == PANVK_VARY_BUF_GENERAL); - - /* Keep things aligned a 32-bit component. */ - buf_offsets[loc] = buf_strides[buf_idx]; - buf_strides[buf_idx] += ALIGN_POT(out_size, 4); - } - - stage->varyings.attribs = emit_varying_attrs( - &dev->mempools.rw, stage->info.varyings.output, - stage->info.varyings.output_count, &out_vars, buf_offsets); - next_stage->varyings.attribs = emit_varying_attrs( - &dev->mempools.rw, next_stage->info.varyings.input, - next_stage->info.varyings.input_count, &in_vars, buf_offsets); - memcpy(stage->varyings.buf_strides, buf_strides, - sizeof(stage->varyings.buf_strides)); - memcpy(next_stage->varyings.buf_strides, buf_strides, - sizeof(next_stage->varyings.buf_strides)); -} - static VkResult panvk_graphics_pipeline_create(struct panvk_device *dev, struct vk_pipeline_cache *cache, @@ -344,7 +147,8 @@ panvk_graphics_pipeline_create(struct panvk_device *dev, return result; } - link_shaders(gfx_pipeline, &gfx_pipeline->vs, &gfx_pipeline->fs); + panvk_per_arch(link_shaders)(&dev->mempools.rw, gfx_pipeline->vs.base, + gfx_pipeline->fs.base, &gfx_pipeline->link); return VK_SUCCESS; } @@ -445,6 +249,7 @@ panvk_per_arch(DestroyPipeline)(VkDevice _device, VkPipeline _pipeline, struct panvk_graphics_pipeline *gfx_pipeline = panvk_pipeline_to_graphics_pipeline(pipeline); + panvk_shader_link_cleanup(&device->mempools.rw, &gfx_pipeline->link); cleanup_pipeline_shader(pipeline, &gfx_pipeline->vs, pAllocator); cleanup_pipeline_shader(pipeline, &gfx_pipeline->fs, pAllocator); } else { diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index 0c4702aed41..6563ec6e6cb 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -511,7 +511,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, mali_ptr psiz_buf = 0; for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { - unsigned buf_size = vertex_count * pipeline->vs.varyings.buf_strides[i]; + unsigned buf_size = vertex_count * pipeline->link.buf_strides[i]; mali_ptr buf_addr = buf_size ? pan_pool_alloc_aligned(&cmdbuf->varying_pool.base, buf_size, 64) @@ -519,7 +519,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, : 0; pan_pack(&buf_descs[i], ATTRIBUTE_BUFFER, cfg) { - cfg.stride = pipeline->vs.varyings.buf_strides[i]; + cfg.stride = pipeline->link.buf_strides[i]; cfg.size = buf_size; cfg.pointer = buf_addr; } @@ -544,8 +544,8 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, draw->line_width = 1.0f; draw->varying_bufs = bufs.gpu; - draw->vs.varyings = panvk_priv_mem_dev_addr(pipeline->vs.varyings.attribs); - draw->fs.varyings = panvk_priv_mem_dev_addr(pipeline->fs.varyings.attribs); + draw->vs.varyings = panvk_priv_mem_dev_addr(pipeline->link.vs.attribs); + draw->fs.varyings = panvk_priv_mem_dev_addr(pipeline->link.fs.attribs); } static void diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 67f53c082c0..87de2a67fd3 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -126,6 +126,13 @@ struct panvk_shader { struct panvk_priv_mem rsd; }; +struct panvk_shader_link { + struct { + struct panvk_priv_mem attribs; + } vs, fs; + unsigned buf_strides[PANVK_VARY_BUF_MAX]; +}; + static inline mali_ptr panvk_shader_get_dev_addr(const struct panvk_shader *shader) { @@ -140,6 +147,19 @@ void panvk_per_arch(shader_destroy)(struct panvk_device *dev, struct panvk_shader *shader, const VkAllocationCallbacks *alloc); +void panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, + struct panvk_shader *vs, + struct panvk_shader *fs, + struct panvk_shader_link *link); + +static inline void +panvk_shader_link_cleanup(struct panvk_pool *desc_pool, + struct panvk_shader_link *link) +{ + panvk_pool_free_mem(desc_pool, link->vs.attribs); + panvk_pool_free_mem(desc_pool, link->fs.attribs); +} + bool panvk_per_arch(nir_lower_descriptors)( nir_shader *nir, struct panvk_device *dev, const struct vk_pipeline_layout *layout, struct panvk_shader *shader); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 93b25095682..c6a8bf0078c 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -413,3 +413,204 @@ panvk_per_arch(shader_destroy)(struct panvk_device *dev, free((void *)shader->bin_ptr); vk_free2(&dev->vk.alloc, alloc, shader); } + +static mali_pixel_format +get_varying_format(gl_shader_stage stage, gl_varying_slot loc, + enum pipe_format pfmt) +{ + switch (loc) { + case VARYING_SLOT_PNTC: + case VARYING_SLOT_PSIZ: +#if PAN_ARCH <= 6 + return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); +#else + return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000; +#endif + case VARYING_SLOT_POS: +#if PAN_ARCH <= 6 + return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4); +#else + return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA; +#endif + default: + assert(pfmt != PIPE_FORMAT_NONE); + return GENX(panfrost_format_from_pipe_format)(pfmt)->hw; + } +} + +struct varyings_info { + enum pipe_format fmts[VARYING_SLOT_MAX]; + BITSET_DECLARE(active, VARYING_SLOT_MAX); +}; + +static void +collect_varyings_info(const struct pan_shader_varying *varyings, + unsigned varying_count, struct varyings_info *info) +{ + for (unsigned i = 0; i < varying_count; i++) { + gl_varying_slot loc = varyings[i].location; + + if (varyings[i].format == PIPE_FORMAT_NONE) + continue; + + info->fmts[loc] = varyings[i].format; + BITSET_SET(info->active, loc); + } +} + +static inline enum panvk_varying_buf_id +varying_buf_id(gl_varying_slot loc) +{ + switch (loc) { + case VARYING_SLOT_POS: + return PANVK_VARY_BUF_POSITION; + case VARYING_SLOT_PSIZ: + return PANVK_VARY_BUF_PSIZ; + default: + return PANVK_VARY_BUF_GENERAL; + } +} + +static mali_pixel_format +varying_format(gl_varying_slot loc, enum pipe_format pfmt) +{ + switch (loc) { + case VARYING_SLOT_PNTC: + case VARYING_SLOT_PSIZ: +#if PAN_ARCH <= 6 + return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); +#else + return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000; +#endif + case VARYING_SLOT_POS: +#if PAN_ARCH <= 6 + return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4); +#else + return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA; +#endif + default: + return GENX(panfrost_format_from_pipe_format)(pfmt)->hw; + } +} + +static struct panvk_priv_mem +emit_varying_attrs(struct panvk_pool *desc_pool, + const struct pan_shader_varying *varyings, + unsigned varying_count, const struct varyings_info *info, + unsigned *buf_offsets) +{ + unsigned attr_count = BITSET_COUNT(info->active); + struct panvk_priv_mem mem = + panvk_pool_alloc_desc_array(desc_pool, attr_count, ATTRIBUTE); + struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(mem); + unsigned attr_idx = 0; + + for (unsigned i = 0; i < varying_count; i++) { + pan_pack(&attrs[attr_idx++], ATTRIBUTE, cfg) { + gl_varying_slot loc = varyings[i].location; + enum pipe_format pfmt = varyings[i].format != PIPE_FORMAT_NONE + ? info->fmts[loc] + : PIPE_FORMAT_NONE; + + if (pfmt == PIPE_FORMAT_NONE) { +#if PAN_ARCH >= 7 + cfg.format = (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; +#else + cfg.format = (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); +#endif + } else { + cfg.buffer_index = varying_buf_id(loc); + cfg.offset = buf_offsets[loc]; + cfg.format = varying_format(loc, info->fmts[loc]); + } + cfg.offset_enable = false; + } + } + + return mem; +} + +void +panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool, + struct panvk_shader *vs, struct panvk_shader *fs, + struct panvk_shader_link *link) +{ + BITSET_DECLARE(active_attrs, VARYING_SLOT_MAX) = {0}; + unsigned buf_strides[PANVK_VARY_BUF_MAX] = {0}; + unsigned buf_offsets[VARYING_SLOT_MAX] = {0}; + struct varyings_info out_vars = {0}; + struct varyings_info in_vars = {0}; + unsigned loc; + + assert(vs); + assert(vs->info.stage == MESA_SHADER_VERTEX); + + collect_varyings_info(vs->info.varyings.output, + vs->info.varyings.output_count, &out_vars); + + if (fs) { + assert(fs->info.stage == MESA_SHADER_FRAGMENT); + collect_varyings_info(fs->info.varyings.input, + fs->info.varyings.input_count, &in_vars); + } + + BITSET_OR(active_attrs, in_vars.active, out_vars.active); + + /* Handle the position and point size buffers explicitly, as they are + * passed through separate buffer pointers to the tiler job. + */ + if (BITSET_TEST(out_vars.active, VARYING_SLOT_POS)) { + buf_strides[PANVK_VARY_BUF_POSITION] = sizeof(float) * 4; + BITSET_CLEAR(active_attrs, VARYING_SLOT_POS); + } + + if (BITSET_TEST(out_vars.active, VARYING_SLOT_PSIZ)) { + buf_strides[PANVK_VARY_BUF_PSIZ] = sizeof(uint16_t); + BITSET_CLEAR(active_attrs, VARYING_SLOT_PSIZ); + } + + BITSET_FOREACH_SET(loc, active_attrs, VARYING_SLOT_MAX) { + /* We expect the VS to write to all inputs read by the FS, and the + * FS to read all inputs written by the VS. If that's not the + * case, we keep PIPE_FORMAT_NONE to reflect the fact we should use a + * sink attribute (writes are discarded, reads return zeros). + */ + if (in_vars.fmts[loc] == PIPE_FORMAT_NONE || + out_vars.fmts[loc] == PIPE_FORMAT_NONE) { + in_vars.fmts[loc] = PIPE_FORMAT_NONE; + out_vars.fmts[loc] = PIPE_FORMAT_NONE; + continue; + } + + unsigned out_size = util_format_get_blocksize(out_vars.fmts[loc]); + unsigned buf_idx = varying_buf_id(loc); + + /* Always trust the VS input format, so we can: + * - discard components that are never read + * - use float types for interpolated fragment shader inputs + * - use fp16 for floats with mediump + * - make sure components that are not written by the FS are set to zero + */ + out_vars.fmts[loc] = in_vars.fmts[loc]; + + /* Special buffers are handled explicitly before this loop, everything + * else should be laid out in the general varying buffer. + */ + assert(buf_idx == PANVK_VARY_BUF_GENERAL); + + /* Keep things aligned a 32-bit component. */ + buf_offsets[loc] = buf_strides[buf_idx]; + buf_strides[buf_idx] += ALIGN_POT(out_size, 4); + } + + link->vs.attribs = emit_varying_attrs(desc_pool, vs->info.varyings.output, + vs->info.varyings.output_count, + &out_vars, buf_offsets); + + if (fs) + link->fs.attribs = emit_varying_attrs(desc_pool, fs->info.varyings.input, + fs->info.varyings.input_count, + &in_vars, buf_offsets); + + memcpy(link->buf_strides, buf_strides, sizeof(link->buf_strides)); +}