diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index 229740d424f..14a97cc6539 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -1181,11 +1181,6 @@ emit_clip_window(struct v3dv_job *job, const VkRect2D *rect) } } -/* Checks whether the render area rectangle covers a region that is aligned to - * tile boundaries, which means that for all tiles covered by the render area - * region, there are no uncovered pixels (unless they are also outside the - * framebuffer). - */ static void cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer) { @@ -1200,24 +1195,11 @@ cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer) * always have framebuffer information available. */ assert(cmd_buffer->state.framebuffer); - - const VkExtent2D fb_extent = { - .width = cmd_buffer->state.framebuffer->width, - .height = cmd_buffer->state.framebuffer->height - }; - - VkExtent2D granularity; - v3dv_subpass_get_granularity(cmd_buffer->state.pass, - cmd_buffer->state.subpass_idx, - &granularity); - cmd_buffer->state.tile_aligned_render_area = - rect->offset.x % granularity.width == 0 && - rect->offset.y % granularity.height == 0 && - (rect->extent.width % granularity.width == 0 || - rect->offset.x + rect->extent.width >= fb_extent.width) && - (rect->extent.height % granularity.height == 0 || - rect->offset.y + rect->extent.height >= fb_extent.height); + v3dv_subpass_area_is_tile_aligned(rect, + cmd_buffer->state.framebuffer, + cmd_buffer->state.pass, + cmd_buffer->state.subpass_idx); if (!cmd_buffer->state.tile_aligned_render_area) { perf_debug("Render area for subpass %d of render pass %p doesn't " @@ -2023,7 +2005,6 @@ cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer) assert(state->subpass_idx < state->pass->subpass_count); const struct v3dv_render_pass *pass = state->pass; const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; - struct v3dv_cl *rcl = &job->rcl; /* Comon config must be the first TILE_RENDERING_MODE_CFG and @@ -2031,7 +2012,6 @@ cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer) * updates to the previous HW state. */ const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { config.image_width_pixels = framebuffer->width; config.image_height_pixels = framebuffer->height; diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 36d74b8c655..6e0eefb031d 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -1977,6 +1977,8 @@ v3dv_CreateFramebuffer(VkDevice _device, framebuffer->width = pCreateInfo->width; framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; + framebuffer->has_edge_padding = true; + framebuffer->attachment_count = pCreateInfo->attachmentCount; framebuffer->color_attachment_count = 0; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { diff --git a/src/broadcom/vulkan/v3dv_meta_copy.c b/src/broadcom/vulkan/v3dv_meta_copy.c index fe9e6a0c5c5..0ba96b84257 100644 --- a/src/broadcom/vulkan/v3dv_meta_copy.c +++ b/src/broadcom/vulkan/v3dv_meta_copy.c @@ -61,6 +61,7 @@ v3dv_meta_blit_finish(struct v3dv_device *device) struct v3dv_meta_blit_pipeline *item = entry->data; v3dv_DestroyPipeline(_device, item->pipeline, &device->alloc); v3dv_DestroyRenderPass(_device, item->pass, &device->alloc); + v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->alloc); vk_free(&device->alloc, item); } _mesa_hash_table_destroy(device->meta.blit.cache[i], NULL); @@ -771,7 +772,8 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, VkColorComponentFlags cmask, VkComponentMapping *cswizzle, const VkImageBlit *region, - VkFilter filter); + VkFilter filter, + bool dst_is_padded_image); /** * Returns true if the implementation supports the requested operation (even if @@ -998,7 +1000,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, v3dv_image_from_handle(buffer_image), dst_format, image, src_format, cmask, &cswizzle, - &blit_region, VK_FILTER_NEAREST); + &blit_region, VK_FILTER_NEAREST, false); if (!handled) { /* This is unexpected, we should have a supported blit spec */ unreachable("Unable to blit buffer to destination image"); @@ -1454,7 +1456,7 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, dst, format, src, format, 0, NULL, - &blit_region, VK_FILTER_NEAREST); + &blit_region, VK_FILTER_NEAREST, true); /* We should have selected formats that we can blit */ assert(handled); @@ -2693,7 +2695,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, image, dst_format, v3dv_image_from_handle(buffer_image), src_format, cmask, NULL, - &blit_region, VK_FILTER_NEAREST); + &blit_region, VK_FILTER_NEAREST, true); if (!handled) { /* This is unexpected, we should have a supported blit spec */ unreachable("Unable to blit buffer to destination image"); @@ -3101,20 +3103,15 @@ static bool create_blit_render_pass(struct v3dv_device *device, VkFormat dst_format, VkFormat src_format, - VkRenderPass *pass) + VkRenderPass *pass_load, + VkRenderPass *pass_no_load) { const bool is_color_blit = vk_format_is_color(dst_format); - /* FIXME: if blitting to tile boundaries or to the whole image, we could - * use LOAD_DONT_CARE, but then we would have to include that in the - * pipeline hash key. Or maybe we should just create both render passes and - * use one or the other at draw time since they would both be compatible - * with the pipeline anyway - */ + /* Attachment load operation is specified below */ VkAttachmentDescription att = { .format = dst_format, .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_GENERAL, .finalLayout = VK_IMAGE_LAYOUT_GENERAL, @@ -3146,8 +3143,16 @@ create_blit_render_pass(struct v3dv_device *device, .pDependencies = NULL, }; - VkResult result = v3dv_CreateRenderPass(v3dv_device_to_handle(device), - &info, &device->alloc, pass); + VkResult result; + att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + result = v3dv_CreateRenderPass(v3dv_device_to_handle(device), + &info, &device->alloc, pass_load); + if (result != VK_SUCCESS) + return false; + + att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + result = v3dv_CreateRenderPass(v3dv_device_to_handle(device), + &info, &device->alloc, pass_no_load); return result == VK_SUCCESS; } @@ -3763,10 +3768,14 @@ get_blit_pipeline(struct v3dv_device *device, goto fail; ok = create_blit_render_pass(device, dst_format, src_format, - &(*pipeline)->pass); + &(*pipeline)->pass, + &(*pipeline)->pass_no_load); if (!ok) goto fail; + /* Create the pipeline using one of the render passes, they are both + * compatible, so we don't care which one we use here. + */ ok = create_blit_pipeline(device, dst_format, src_format, @@ -3794,6 +3803,8 @@ fail: if (*pipeline) { if ((*pipeline)->pass) v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->alloc); + if ((*pipeline)->pass_no_load) + v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->alloc); if ((*pipeline)->pipeline) v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc); vk_free(&device->alloc, *pipeline); @@ -3896,7 +3907,8 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, VkColorComponentFlags cmask, VkComponentMapping *cswizzle, const VkImageBlit *_region, - VkFilter filter) + VkFilter filter, + bool dst_is_padded_image) { bool handled = true; @@ -3907,7 +3919,6 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, !vk_format_is_depth_or_stencil(dst_format)); VkImageBlit region = *_region; - /* Rewrite combined D/S blits to compatible color blits */ if (vk_format_is_depth_or_stencil(dst_format)) { assert(src_format == dst_format); @@ -3940,12 +3951,12 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; } - if (cmask == 0) { - cmask = VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | - VK_COLOR_COMPONENT_A_BIT; - } + const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT; + if (cmask == 0) + cmask = full_cmask; VkComponentMapping ident_swizzle = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -4072,7 +4083,8 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, &pipeline); if (!ok) return handled; - assert(pipeline && pipeline->pipeline && pipeline->pass); + assert(pipeline && pipeline->pipeline && + pipeline->pass && pipeline->pass_no_load); struct v3dv_device *device = cmd_buffer->device; assert(cmd_buffer->meta.blit.dspool); @@ -4128,6 +4140,11 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, if (result != VK_SUCCESS) goto fail; + struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb); + framebuffer->has_edge_padding = fb_info.width == dst_level_w && + fb_info.height == dst_level_h && + dst_is_padded_image; + v3dv_cmd_buffer_add_private_obj( cmd_buffer, (uintptr_t)fb, (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer); @@ -4208,15 +4225,30 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, }; v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL); + /* If the region we are about to blit is tile-aligned, then we can + * use the render pass version that won't pre-load the tile buffer + * with the dst image contents before the blit. The exception is when we + * don't have a full color mask, since in that case we need to preserve + * the original value of some of the color components. + */ + const VkRect2D render_area = { + .offset = { dst_x, dst_y }, + .extent = { dst_w, dst_h }, + }; + struct v3dv_render_pass *pipeline_pass = + v3dv_render_pass_from_handle(pipeline->pass); + bool can_skip_tlb_load = + cmask == full_cmask && + v3dv_subpass_area_is_tile_aligned(&render_area, framebuffer, + pipeline_pass, 0); + /* Record blit */ VkRenderPassBeginInfo rp_info = { .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = pipeline->pass, + .renderPass = can_skip_tlb_load ? pipeline->pass_no_load : + pipeline->pass, .framebuffer = fb, - .renderArea = { - .offset = { dst_x, dst_y }, - .extent = { dst_w, dst_h } - }, + .renderArea = render_area, .clearValueCount = 0, }; @@ -4308,7 +4340,7 @@ v3dv_CmdBlitImage(VkCommandBuffer commandBuffer, dst, dst->vk_format, src, src->vk_format, 0, NULL, - &pRegions[i], filter)) { + &pRegions[i], filter, true)) { continue; } unreachable("Unsupported blit operation"); @@ -4469,7 +4501,7 @@ resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer, dst, dst->vk_format, src, src->vk_format, 0, NULL, - &blit_region, VK_FILTER_NEAREST); + &blit_region, VK_FILTER_NEAREST, true); } void diff --git a/src/broadcom/vulkan/v3dv_pass.c b/src/broadcom/vulkan/v3dv_pass.c index 35f9c614289..a030b1c4e1d 100644 --- a/src/broadcom/vulkan/v3dv_pass.c +++ b/src/broadcom/vulkan/v3dv_pass.c @@ -255,10 +255,10 @@ v3dv_DestroyRenderPass(VkDevice _device, vk_free2(&device->alloc, pAllocator, pass); } -void -v3dv_subpass_get_granularity(struct v3dv_render_pass *pass, - uint32_t subpass_idx, - VkExtent2D *granularity) +static void +subpass_get_granularity(struct v3dv_render_pass *pass, + uint32_t subpass_idx, + VkExtent2D *granularity) { static const uint8_t tile_sizes[] = { 64, 64, @@ -321,8 +321,50 @@ v3dv_GetRenderAreaGranularity(VkDevice device, for (uint32_t i = 0; i < pass->subpass_count; i++) { VkExtent2D sg; - v3dv_subpass_get_granularity(pass, i, &sg); + subpass_get_granularity(pass, i, &sg); pGranularity->width = MIN2(pGranularity->width, sg.width); pGranularity->height = MIN2(pGranularity->height, sg.height); } } + +/* Checks whether the render area rectangle covers a region that is aligned to + * tile boundaries. This means that we are writing to all pixels covered by + * all tiles in that area (except for pixels on edge tiles that are outside + * the framebuffer dimensions). + * + * When our framebuffer is aligned to tile boundaries we know we are writing + * valid data to all all pixels in each tile and we can apply certain + * optimizations, like avoiding tile loads, since we know that none of the + * original pixel values in each tile for that area need to be preserved. + * We also use this to decide if we can use TLB clears, as these clear whole + * tiles so we can't use them if the render area is not aligned. + * + * Note that when an image is created it will possibly include padding blocks + * depending on its tiling layout. When the framebuffer dimensions are not + * aligned to tile boundaries then edge tiles are only partially covered by the + * framebuffer pixels, but tile stores still seem to store full tiles + * writing to the padded sections. This is important when the framebuffer + * is aliasing a smaller section of a larger image, as in that case the edge + * tiles of the framebuffer would overwrite valid pixels in the larger image. + * In that case, we can't flag the area as being aligned. + */ +bool +v3dv_subpass_area_is_tile_aligned(const VkRect2D *area, + struct v3dv_framebuffer *fb, + struct v3dv_render_pass *pass, + uint32_t subpass_idx) +{ + assert(subpass_idx >= 0 && subpass_idx < pass->subpass_count); + + VkExtent2D granularity; + subpass_get_granularity(pass, subpass_idx, &granularity); + + return area->offset.x % granularity.width == 0 && + area->offset.y % granularity.height == 0 && + (area->extent.width % granularity.width == 0 || + (fb->has_edge_padding && + area->offset.x + area->extent.width >= fb->width)) && + (area->extent.height % granularity.height == 0 || + (fb->has_edge_padding && + area->offset.y + area->extent.height >= fb->height)); +} diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 0528111148b..caa7ea699b5 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -257,6 +257,7 @@ struct v3dv_meta_depth_clear_pipeline { struct v3dv_meta_blit_pipeline { VkPipeline pipeline; VkRenderPass pass; + VkRenderPass pass_no_load; uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE]; }; @@ -555,15 +556,22 @@ struct v3dv_render_pass { struct v3dv_subpass_attachment *subpass_attachments; }; -void v3dv_subpass_get_granularity(struct v3dv_render_pass *pass, - uint32_t subpass_idx, - VkExtent2D *granularity); - struct v3dv_framebuffer { uint32_t width; uint32_t height; uint32_t layers; + /* Typically, edge tiles in the framebuffer have padding depending on the + * underlying tiling layout. One consequnce of this is that when the + * framebuffer dimensions are not aligned to tile boundaries, tile stores + * would still write full tiles on the edges and write to the padded area. + * If the framebuffer is aliasing a smaller region of a larger image, then + * we need to be careful with this though, as we won't have padding on the + * edge tiles (which typically means that we need to load the tile buffer + * before we store). + */ + bool has_edge_padding; + uint32_t attachment_count; uint32_t color_attachment_count; struct v3dv_image_view *attachments[0]; @@ -590,6 +598,10 @@ void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *f const struct v3dv_subpass *subpass, uint8_t *max_bpp, bool *msaa); +bool v3dv_subpass_area_is_tile_aligned(const VkRect2D *area, + struct v3dv_framebuffer *fb, + struct v3dv_render_pass *pass, + uint32_t subpass_idx); struct v3dv_cmd_pool { VkAllocationCallbacks alloc; struct list_head cmd_buffers;