diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc index d8caa651e99..9964ea444a0 100644 --- a/src/freedreno/vulkan/tu_pass.cc +++ b/src/freedreno/vulkan/tu_pass.cc @@ -599,6 +599,53 @@ tu_render_pass_check_ib2_skip(struct tu_render_pass *pass) } } +struct tu_gmem_alloc { + uint32_t gmem_offset; + uint32_t cpp; + uint32_t first_subpass; + uint32_t last_subpass; +}; + +static struct tu_gmem_alloc * +tu_gmem_alloc(struct tu_gmem_alloc *allocs, + uint32_t *num_allocs, + uint32_t cpp, + uint32_t first_subpass, + uint32_t last_subpass) +{ + struct tu_gmem_alloc *alloc = NULL; + + /* Find an allocation that is free during our subpass range, with the + * closet usable cpp. + */ + for (int i = 0; i < *num_allocs; i++) { + if (!(allocs[i].first_subpass > last_subpass || + allocs[i].last_subpass < first_subpass)) { + continue; + } + + if (allocs[i].cpp == cpp) { + alloc = &allocs[i]; + break; + } + if (allocs[i].cpp > cpp && (!alloc || alloc->cpp > allocs[i].cpp)) + alloc = &allocs[i]; + } + if (alloc) { + /* Expand the range of the existing allocation */ + alloc->first_subpass = MIN2(alloc->first_subpass, first_subpass); + alloc->last_subpass = MAX2(alloc->last_subpass, last_subpass); + } else { + /* Make a new gmem allocation for this cpp. */ + alloc = &allocs[(*num_allocs)++]; + alloc->cpp = cpp; + alloc->first_subpass = first_subpass; + alloc->last_subpass = last_subpass; + } + + return alloc; +} + static void tu_render_pass_gmem_config(struct tu_render_pass *pass, const struct tu_physical_device *phys_dev) @@ -609,24 +656,31 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass, /* log2(gmem_align/(tile_align_w*tile_align_h)) */ uint32_t block_align_shift = 3; uint32_t tile_align_w = phys_dev->info->tile_align_w; - uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * - phys_dev->info->tile_align_h; + uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * phys_dev->info->tile_align_h; + + /* gmem allocations to make, possibly shared between attachments. Each + * attachment may have 2 allocations, to handle separate stencil. + */ + struct tu_gmem_alloc gmem_alloc[2 * pass->attachment_count]; + uint32_t num_gmem_alloc = 0; + struct tu_gmem_alloc *att_gmem_alloc[2 * pass->attachment_count]; + for (int i = 0; i < ARRAY_SIZE(att_gmem_alloc); i++) + att_gmem_alloc[i] = NULL; - /* calculate total bytes per pixel */ - uint32_t cpp_total = 0; - uint32_t min_cpp = UINT32_MAX; for (uint32_t i = 0; i < pass->attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; bool cpp1 = (att->cpp == 1); if (att->gmem) { - cpp_total += att->cpp; - min_cpp = MIN2(min_cpp, att->cpp); + att_gmem_alloc[i * 2] = + tu_gmem_alloc(gmem_alloc, &num_gmem_alloc, att->cpp, + att->first_subpass_idx, att->last_subpass_idx); /* take into account the separate stencil: */ if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - min_cpp = MIN2(min_cpp, att->samples); cpp1 = (att->samples == 1); - cpp_total += att->samples; + att_gmem_alloc[i * 2 + 1] = + tu_gmem_alloc(gmem_alloc, &num_gmem_alloc, att->samples, + att->first_subpass_idx, att->last_subpass_idx); } /* texture pitch must be aligned to 64, use a tile_align_w that is @@ -640,16 +694,22 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass, } } + uint32_t cpp_total = 0; + uint32_t min_cpp = UINT32_MAX; + for (int i = 0; i < num_gmem_alloc; i++) { + cpp_total += gmem_alloc[i].cpp; + min_cpp = MIN2(min_cpp, gmem_alloc[i].cpp); + } + pass->tile_align_w = tile_align_w; pass->min_cpp = min_cpp; /* no gmem attachments */ if (cpp_total == 0) { - /* any value non-zero value so tiling config works with no - * attachments - */ - pass->gmem_pixels[layout] = 1024 * 1024; - continue; + /* any non-zero value so tiling config works with no attachments */ + for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++) + pass->gmem_pixels[i] = 1024*1024; + return; } /* TODO: this algorithm isn't optimal @@ -662,44 +722,36 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass, : phys_dev->ccu_offset_gmem; uint32_t gmem_blocks = gmem_size / gmem_align; uint32_t offset = 0, pixels = ~0u, i; + for (i = 0; i < num_gmem_alloc; i++) { + struct tu_gmem_alloc *alloc = &gmem_alloc[i]; + + uint32_t align = MAX2(1, alloc->cpp >> block_align_shift); + uint32_t nblocks = MAX2((gmem_blocks * alloc->cpp / cpp_total) & ~(align - 1), align); + + if (nblocks > gmem_blocks) { + /* gmem layout impossible */ + pass->gmem_pixels[layout] = 0; + continue; + } + + gmem_blocks -= nblocks; + cpp_total -= alloc->cpp; + alloc->gmem_offset = offset; + offset += nblocks * gmem_align; + pixels = MIN2(pixels, nblocks * gmem_align / alloc->cpp); + } + + pass->gmem_pixels[layout] = pixels; + for (i = 0; i < pass->attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; if (!att->gmem) continue; - att->gmem_offset[layout] = offset; - - uint32_t align = MAX2(1, att->cpp >> block_align_shift); - uint32_t nblocks = - MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align); - - if (nblocks > gmem_blocks) - break; - - gmem_blocks -= nblocks; - cpp_total -= att->cpp; - offset += nblocks * gmem_align; - pixels = MIN2(pixels, nblocks * gmem_align / att->cpp); - - /* repeat the same for separate stencil */ - if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - att->gmem_offset_stencil[layout] = offset; - - /* note: for s8_uint, block align is always 1 */ - uint32_t nblocks = gmem_blocks * att->samples / cpp_total; - if (nblocks > gmem_blocks) - break; - - gmem_blocks -= nblocks; - cpp_total -= att->samples; - offset += nblocks * gmem_align; - pixels = MIN2(pixels, nblocks * gmem_align / att->samples); - } + att->gmem_offset[layout] = att_gmem_alloc[2 * i]->gmem_offset; + if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) + att->gmem_offset_stencil[layout] = att_gmem_alloc[2 * i + 1]->gmem_offset; } - - /* if the loop didn't complete then the gmem config is impossible */ - if (i == pass->attachment_count) - pass->gmem_pixels[layout] = pixels; } }