From a7de9dae6bd51a272d2462fef844f49e620803dd Mon Sep 17 00:00:00 2001 From: Ella Stanforth Date: Mon, 15 Sep 2025 12:03:56 +0100 Subject: [PATCH] pvr: Add routine for filling out usc_mrt_setup from dynamic rendering state Signed-off-by: Ella Stanforth Co-authored-by: Luigi Santivetti Acked-by: Erik Faye-Lund Part-of: --- src/imagination/vulkan/meson.build | 1 + src/imagination/vulkan/pvr_hw_pass.h | 80 +-------- src/imagination/vulkan/pvr_mrt.c | 237 +++++++++++++++++++++++++++ src/imagination/vulkan/pvr_mrt.h | 108 ++++++++++++ 4 files changed, 347 insertions(+), 79 deletions(-) create mode 100644 src/imagination/vulkan/pvr_mrt.c create mode 100644 src/imagination/vulkan/pvr_mrt.h diff --git a/src/imagination/vulkan/meson.build b/src/imagination/vulkan/meson.build index 356d59dc048..c16833e66df 100644 --- a/src/imagination/vulkan/meson.build +++ b/src/imagination/vulkan/meson.build @@ -43,6 +43,7 @@ pvr_files = files( 'pvr_job_context.c', 'pvr_job_render.c', 'pvr_job_transfer.c', + 'pvr_mrt.c', 'pvr_pass.c', 'pvr_physical_device.c', 'pvr_pipeline.c', diff --git a/src/imagination/vulkan/pvr_hw_pass.h b/src/imagination/vulkan/pvr_hw_pass.h index 2e1a0898dcb..7681abf7316 100644 --- a/src/imagination/vulkan/pvr_hw_pass.h +++ b/src/imagination/vulkan/pvr_hw_pass.h @@ -31,12 +31,7 @@ struct pvr_device; struct pvr_render_pass; -/* Specifies the location of render target writes. */ -enum usc_mrt_resource_type { - USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */ - USC_MRT_RESOURCE_TYPE_OUTPUT_REG, - USC_MRT_RESOURCE_TYPE_MEMORY, -}; +#include "pvr_mrt.h" enum pvr_resolve_type { PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */ @@ -55,79 +50,6 @@ enum pvr_renderpass_hwsetup_input_access { PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE, }; -#define PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS (4) - -struct usc_mrt_desc { - /* Size (in bytes) of the intermediate storage required for each pixel in the - * render target. - */ - uint32_t intermediate_size; - - /* Mask of the bits from each dword which are read by the PBE. */ - uint32_t valid_mask[PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS]; - - /* Higher number = higher priority. Used to decide which render targets get - * allocated dedicated output registers. - */ - uint32_t priority; -}; - -struct usc_mrt_resource { - /* Input description of render target. */ - struct usc_mrt_desc mrt_desc; - - /* Resource type allocated for render target. */ - enum usc_mrt_resource_type type; - - /* Intermediate pixel size (in bytes). */ - uint32_t intermediate_size; - - union { - /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */ - struct { - /* The output register to use. */ - uint32_t output_reg; - - /* The offset in bytes into the output register. */ - uint32_t offset; - } reg; - - /* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */ - struct { - /* The index of the tile buffer to use. */ - uint32_t tile_buffer; - - /* The offset in dwords within the tile buffer. */ - uint32_t offset_dw; - } mem; - }; -}; - -struct usc_mrt_setup { - /* Number of render targets present. */ - uint32_t num_render_targets; - - /* Number of output registers used per-pixel (1, 2 or 4). */ - uint32_t num_output_regs; - - /* Number of tile buffers used. */ - uint32_t num_tile_buffers; - - /* Size of a tile buffer in bytes. */ - uint32_t tile_buffer_size; - - /* Array of MRT resources allocated for each render target. The number of - * elements is determined by usc_mrt_setup::num_render_targets. - */ - struct usc_mrt_resource *mrt_resources; - - /* Don't set up source pos in emit. */ - bool disable_source_pos_override; - - /* Hash unique to this particular setup. */ - uint32_t hash; -}; - struct pvr_renderpass_hwsetup_eot_surface { /* MRT index to store from. Also used to index into * usc_mrt_setup::mrt_resources. diff --git a/src/imagination/vulkan/pvr_mrt.c b/src/imagination/vulkan/pvr_mrt.c new file mode 100644 index 00000000000..8f6c25de9fc --- /dev/null +++ b/src/imagination/vulkan/pvr_mrt.c @@ -0,0 +1,237 @@ +/* + * Copyright © 2025 Imagination Technologies Ltd. + * + * SPDX-License-Identifier: MIT + */ + +#include "pvr_mrt.h" + +#include "vk_log.h" + +#include "pvr_device.h" +#include "pvr_formats.h" +#include "pvr_physical_device.h" + +#include "hwdef/rogue_hw_utils.h" +#include "util/macros.h" + +#include "pvr_mrt.h" + +/* Which parts of the output registers/a tile buffer are currently allocated. */ +struct pvr_mrt_alloc_mask { + /* Bit array. A bit is set if the corresponding dword is allocated. */ + BITSET_DECLARE(allocs, 8U); +}; + +struct pvr_mrt_alloc_ctx { + /* Which pixel output registers are allocated. */ + struct pvr_mrt_alloc_mask output_reg; + + /* Range of allocated output registers. */ + uint32_t output_regs_count; + + /* Number of tile buffers allocated. */ + uint32_t tile_buffers_count; + + /* Which parts of each tile buffer are allocated. Length is + * tile_buffers_count. + */ + struct pvr_mrt_alloc_mask tile_buffers[PVR_MAX_TILE_BUFFER_COUNT]; +}; + +static uint32_t pvr_get_accum_format_bitsize(VkFormat vk_format) +{ + return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8; +} + +/** + * Check if there is space in a buffer for storing a render target of a + * specified size. + */ +static int32_t +pvr_mrt_alloc_from_buffer(const struct pvr_device_info *dev_info, + struct pvr_mrt_alloc_mask *buffer, + uint32_t pixel_size) +{ + const uint32_t max_out_regs = rogue_get_max_output_regs_per_pixel(dev_info); + uint32_t alignment = 1U; + + if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe)) { + /* For a 64-bit/128-bit source format: the start offset must be even. */ + if (pixel_size == 2U || pixel_size == 4U) + alignment = 2U; + } + + assert(pixel_size <= max_out_regs); + + for (uint32_t i = 0U; i <= (max_out_regs - pixel_size); i += alignment) { + if (!BITSET_TEST_RANGE(buffer->allocs, i, i + pixel_size - 1U)) { + BITSET_SET_RANGE(buffer->allocs, i, i + pixel_size - 1U); + return i; + } + } + + return -1; +} + +void +pvr_init_mrt_desc(VkFormat format, struct usc_mrt_desc *desc) +{ + uint32_t pixel_size_in_chunks; + uint32_t pixel_size_in_bits; + + /* TODO: Add support for packing multiple attachments into the same + * register + */ + const uint32_t part_bits = 0; + if (vk_format_is_color(format) && + pvr_get_pbe_accum_format(format) == PVR_PBE_ACCUM_FORMAT_INVALID) { + /* The VkFormat is not supported as a color attachment so `0`. + * vulkan doesn't seem to restrict vkCreateRenderPass() to supported + * formats only. + */ + pixel_size_in_bits = 0; + } else { + /* TODO: handle IMG_PIXFMT_A8_UNORM + * For alpha only formats alpha is still placed in channel 3, so channels + * 0-2 need to be allocated but are left unused + */ + pixel_size_in_bits = pvr_get_accum_format_bitsize(format); + } + + desc->intermediate_size = DIV_ROUND_UP(pixel_size_in_bits, CHAR_BIT); + + pixel_size_in_chunks = DIV_ROUND_UP(pixel_size_in_bits, 32U); + for (uint32_t j = 0U; j < pixel_size_in_chunks; j++) + desc->valid_mask[j] = ~0; + + if (part_bits > 0U) + desc->valid_mask[pixel_size_in_chunks] = BITFIELD_MASK(part_bits); +} + +static VkResult pvr_alloc_mrt(const struct pvr_device_info *dev_info, + struct pvr_mrt_alloc_ctx *alloc, + struct usc_mrt_setup *setup, + unsigned rt, + VkFormat format) +{ + struct usc_mrt_resource *resource = &setup->mrt_resources[rt]; + + const uint32_t pixel_size = + DIV_ROUND_UP(pvr_get_accum_format_bitsize(format), 32U); + + const int32_t output_reg = + pvr_mrt_alloc_from_buffer(dev_info, &alloc->output_reg, pixel_size); + + if (output_reg != -1) { + resource->type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG; + resource->reg.output_reg = output_reg; + resource->reg.offset = 0; + + alloc->output_regs_count = + MAX2(alloc->output_regs_count, resource->reg.output_reg + pixel_size); + } else { + resource->type = USC_MRT_RESOURCE_TYPE_MEMORY; + + unsigned tib = 0; + for (; tib < alloc->tile_buffers_count; tib++) { + struct pvr_mrt_alloc_mask *tib_alloc = + &alloc->tile_buffers[tib]; + + const int32_t tile_buffer_offset = + pvr_mrt_alloc_from_buffer(dev_info, tib_alloc, pixel_size); + + if (tile_buffer_offset != -1) { + resource->mem.tile_buffer = tib; + resource->mem.offset_dw = tile_buffer_offset; + break; + } + } + + if (tib == alloc->tile_buffers_count) { + if (alloc->tile_buffers_count == PVR_MAX_TILE_BUFFER_COUNT) + return vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS); + + resource->mem.tile_buffer = alloc->tile_buffers_count; + resource->mem.offset_dw = 0; + } + + /* If needed a new tile buffer than those that were allocated, then wipe + * it bump the global count. + */ + if (resource->mem.tile_buffer >= alloc->tile_buffers_count) { + memset( + &alloc->tile_buffers[alloc->tile_buffers_count], + 0U, + sizeof(alloc->tile_buffers[0U]) * + (resource->mem.tile_buffer + 1U - alloc->tile_buffers_count)); + alloc->tile_buffers_count = resource->mem.tile_buffer + 1U; + } + + /* The hardware makes the bit depth of the on-chip storage and memory + * storage the same so make sure the memory storage is large enough to + * accommodate the largest render target. + */ + alloc->output_regs_count = + MAX2(alloc->output_regs_count, resource->mem.offset_dw + pixel_size); + } + + pvr_init_mrt_desc(format, &resource->mrt_desc); + resource->intermediate_size = resource->mrt_desc.intermediate_size; + + setup->num_render_targets++; + + return VK_SUCCESS; +} + +VkResult +pvr_init_usc_mrt_setup(struct pvr_device *device, + uint32_t attachment_count, + const VkFormat attachment_formats[attachment_count], + struct usc_mrt_setup *setup) +{ + const struct pvr_device_info *dev_info = &device->pdevice->dev_info; + struct pvr_mrt_alloc_ctx alloc = { 0 }; + VkResult result; + + memset(setup, 0, sizeof(*setup)); + + if (!attachment_count) + goto early_exit; + + setup->mrt_resources = + vk_alloc(&device->vk.alloc, + sizeof(*setup->mrt_resources) * attachment_count, 8U, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!setup->mrt_resources) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + for (unsigned i = 0; i < attachment_count; i++) { + VkFormat att_format = attachment_formats[i]; + assert(att_format != VK_FORMAT_UNDEFINED); + + result = pvr_alloc_mrt(dev_info, &alloc, setup, i, att_format); + if (result != VK_SUCCESS) { + result = vk_error(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + } + +early_exit: + setup->num_output_regs = util_next_power_of_two(alloc.output_regs_count); + setup->num_tile_buffers = alloc.tile_buffers_count; + return VK_SUCCESS; +fail: + vk_free(&device->vk.alloc, setup->mrt_resources); + return result; +} + +void +pvr_destroy_mrt_setup(const struct pvr_device *device, + struct usc_mrt_setup *setup) +{ + if (!setup) + return; + + vk_free(&device->vk.alloc, setup->mrt_resources); +} diff --git a/src/imagination/vulkan/pvr_mrt.h b/src/imagination/vulkan/pvr_mrt.h new file mode 100644 index 00000000000..f251aa1db66 --- /dev/null +++ b/src/imagination/vulkan/pvr_mrt.h @@ -0,0 +1,108 @@ +/* + * Copyright © 2025 Imagination Technologies Ltd. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef PVR_MRT_H +#define PVR_MRT_H + +#include +#include +#include + +struct pvr_device; + +/* Specifies the location of render target writes. */ +enum usc_mrt_resource_type { + USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid. */ + USC_MRT_RESOURCE_TYPE_OUTPUT_REG, + USC_MRT_RESOURCE_TYPE_MEMORY, +}; + +#define PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS (4) + +struct usc_mrt_desc { + /* Size (in bytes) of the intermediate storage required for each pixel in the + * render target. + */ + uint32_t intermediate_size; + + /* Mask of the bits from each dword which are read by the PBE. */ + uint32_t valid_mask[PVR_USC_RENDER_TARGET_MAXIMUM_SIZE_IN_DWORDS]; + + /* Higher number = higher priority. Used to decide which render targets get + * allocated dedicated output registers. + */ + uint32_t priority; +}; + +struct usc_mrt_resource { + /* Input description of render target. */ + struct usc_mrt_desc mrt_desc; + + /* Resource type allocated for render target. */ + enum usc_mrt_resource_type type; + + /* Intermediate pixel size (in bytes). */ + uint32_t intermediate_size; + + union { + /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */ + struct { + /* The output register to use. */ + uint32_t output_reg; + + /* The offset in bytes into the output register. */ + uint32_t offset; + } reg; + + /* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */ + struct { + /* The index of the tile buffer to use. */ + uint32_t tile_buffer; + + /* The offset in dwords within the tile buffer. */ + uint32_t offset_dw; + } mem; + }; +}; + +struct usc_mrt_setup { + /* Number of render targets present. */ + uint32_t num_render_targets; + + /* Number of output registers used per-pixel (1, 2 or 4). */ + uint32_t num_output_regs; + + /* Number of tile buffers used. */ + uint32_t num_tile_buffers; + + /* Size of a tile buffer in bytes. */ + uint32_t tile_buffer_size; + + /* Array of MRT resources allocated for each render target. The number of + * elements is determined by usc_mrt_setup::num_render_targets. + */ + struct usc_mrt_resource *mrt_resources; + + /* Don't set up source pos in emit. */ + bool disable_source_pos_override; + + /* Hash unique to this particular setup. */ + uint32_t hash; +}; + +VkResult +pvr_init_usc_mrt_setup(struct pvr_device *device, + uint32_t attachment_count, + const VkFormat attachment_formats[attachment_count], + struct usc_mrt_setup *setup); + +void +pvr_destroy_mrt_setup(const struct pvr_device *device, + struct usc_mrt_setup *setup); + +void pvr_init_mrt_desc(VkFormat format, struct usc_mrt_desc *desc); + +#endif