diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 83a3c031101..544cbc07375 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -4001,49 +4001,29 @@ try_merge_tiles(struct tu_tile_config *dst, struct tu_tile_config *src, return true; } -template -void -tu_render_pipe_fdm(struct tu_cmd_buffer *cmd, uint32_t pipe, - uint32_t tx1, uint32_t ty1, uint32_t tx2, uint32_t ty2, - const struct tu_image_view *fdm, - const VkOffset2D *fdm_offsets) +static void +tu_merge_tiles(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc, + struct tu_tile_config *tiles, + uint32_t tx1, uint32_t ty1, uint32_t tx2, uint32_t ty2) { - uint32_t width = tx2 - tx1; - uint32_t height = ty2 - ty1; - unsigned views = tu_fdm_num_layers(cmd); bool has_abs_mask = cmd->device->physical_device->info->props.has_abs_bin_mask; + unsigned views = tu_fdm_num_layers(cmd); bool shared_viewport = cmd->state.rp.shared_viewport; + uint32_t width = vsc->tile_count.width; - struct tu_tile_config tiles[width * height]; - - /* Initialize tiles and sample fragment density map */ - for (uint32_t y = 0; y < height; y++) { - for (uint32_t x = 0; x < width; x++) { - struct tu_tile_config *tile = &tiles[width * y + x]; - tile->pos = { x + tx1, y + ty1 }; - tile->sysmem_extent = { 1, 1 }; - tile->gmem_extent = { 1, 1 }; - tile->pipe = pipe; - tile->slot_mask = 1u << (width * y + x); - tile->merged_tile = NULL; - tu_calc_bin_visibility(cmd, tile, fdm_offsets); - tu_calc_frag_area(cmd, tile, fdm, fdm_offsets); - } - } - - /* Merge tiles */ - for (uint32_t y = 0; y < height; y++) { - for (uint32_t x = 0; x < width; x++) { - struct tu_tile_config *tile = &tiles[width * y + x]; + for (uint32_t y = ty1; y < ty2; y++) { + for (uint32_t x = tx1; x < tx2; x++) { + struct tu_tile_config *tile = + &tiles[width * y + x]; if (tile->visible_views == 0) continue; - if (x > 0) { + if (x > tx1) { struct tu_tile_config *prev_x_tile = &tiles[width * y + x - 1]; try_merge_tiles(tile, prev_x_tile, views, has_abs_mask, shared_viewport); } - if (y > 0) { + if (y > ty1) { unsigned prev_y_idx = width * (y - 1) + x; struct tu_tile_config *prev_y_tile = &tiles[prev_y_idx]; @@ -4057,24 +4037,59 @@ tu_render_pipe_fdm(struct tu_cmd_buffer *cmd, uint32_t pipe, } } } +} - /* Finally, iterate over tiles and draw them */ - for (uint32_t y = 0; y < height; y++) { - for (uint32_t x = 0; x < width; x++) { - uint32_t tx; - if (y & 1) - tx = width - 1 - x; - else - tx = x; +static struct tu_tile_config * +tu_calc_tile_config(struct tu_cmd_buffer *cmd, const struct tu_vsc_config *vsc, + const struct tu_image_view *fdm, const VkOffset2D *fdm_offsets) +{ + struct tu_tile_config *tiles = (struct tu_tile_config *) + calloc(vsc->tile_count.width * vsc->tile_count.height, + sizeof(struct tu_tile_config)); - unsigned tile_idx = y * width + tx; - struct tu_tile_config *tile = &tiles[tile_idx]; - if (tile->merged_tile || tile->visible_views == 0) - continue; + for (uint32_t py = 0; py < vsc->pipe_count.height; py++) { + uint32_t ty1 = py * vsc->pipe0.height; + uint32_t ty2 = MIN2(ty1 + vsc->pipe0.height, vsc->tile_count.height); + for (uint32_t px = 0; px < vsc->pipe_count.width; px++) { + uint32_t tx1 = px * vsc->pipe0.width; + uint32_t tx2 = MIN2(tx1 + vsc->pipe0.width, vsc->tile_count.width); + uint32_t pipe_width = tx2 - tx1; + uint32_t pipe = py * vsc->pipe_count.width + px; - tu6_render_tile(cmd, &cmd->cs, tile, fdm_offsets); + /* Initialize tiles and sample fragment density map */ + for (uint32_t y = ty1; y < ty2; y++) { + for (uint32_t x = tx1; x < tx2; x++) { + uint32_t tx = x - tx1; + uint32_t ty = y - ty1; + struct tu_tile_config *tile = &tiles[vsc->tile_count.width * y + x]; + + tile->pos = { x, y }; + tile->sysmem_extent = { 1, 1 }; + tile->gmem_extent = { 1, 1 }; + tile->pipe = pipe; + tile->slot_mask = 1u << (pipe_width * ty + tx); + tile->merged_tile = NULL; + tu_calc_bin_visibility(cmd, tile, fdm_offsets); + tu_calc_frag_area(cmd, tile, fdm, fdm_offsets); + } + } + + /* Merge tiles */ + /* TODO: we should also be able to merge tiles when only + * per_view_render_areas is used without FDM. That requires using + * another method to force disable draws since we don't want to force + * the viewport to be re-emitted, like overriding the view mask. It + * would also require disabling stores, and adding patchpoints for + * CmdClearAttachments in secondaries or making it use the view mask. + */ + if (!TU_DEBUG(NO_BIN_MERGING) && + cmd->device->physical_device->info->props.has_bin_mask) { + tu_merge_tiles(cmd, vsc, tiles, tx1, ty1, tx2, ty2); + } } } + + return tiles; } static VkResult @@ -4129,20 +4144,15 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, } bool has_fdm = fdm || (TU_DEBUG(FDM) && cmd->state.pass->has_fdm); - /* TODO: we should also be able to merge tiles when only - * per_view_render_areas is used without FDM. That requires using another - * method to force disable draws since we don't want to force the viewport - * to be re-emitted, like overriding the view mask. It would also require - * disabling stores, and adding patchpoints for CmdClearAttachments in - * secondaries or making it use the view mask. - */ - bool merge_tiles = has_fdm && !TU_DEBUG(NO_BIN_MERGING) && - cmd->device->physical_device->info->props.has_bin_mask; /* If not using FDM make sure not to accidentally apply the offsets */ if (!has_fdm) fdm_offsets = NULL; + struct tu_tile_config *tiles = NULL; + if (has_fdm) + tiles = tu_calc_tile_config(cmd, vsc, fdm, fdm_offsets); + /* Create gmem stores now (at EndRenderPass time)) because they needed to * know whether to allow their conditional execution, which was tied to a * state that was known only at the end of the renderpass. They will be @@ -4171,12 +4181,6 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, uint32_t tx2 = MIN2(tx1 + vsc->pipe0.width, vsc->tile_count.width); uint32_t ty2 = MIN2(ty1 + vsc->pipe0.height, vsc->tile_count.height); - if (merge_tiles) { - tu_render_pipe_fdm(cmd, pipe, tx1, ty1, tx2, ty2, fdm, - fdm_offsets); - continue; - } - uint32_t tile_row_stride = tx2 - tx1; uint32_t slot_row = 0; for (uint32_t ty = ty1; ty < ty2; ty++) { @@ -4187,20 +4191,24 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, else tx = tile_row_i; - struct tu_tile_config tile = { + struct tu_tile_config _tile = { .pos = { tx1 + tx, ty }, .pipe = pipe, .slot_mask = 1u << (slot_row + tx), .sysmem_extent = { 1, 1 }, .gmem_extent = { 1, 1 }, }; - tu_calc_bin_visibility(cmd, &tile, fdm_offsets); - if (has_fdm) - tu_calc_frag_area(cmd, &tile, fdm, fdm_offsets); - else - tu_identity_frag_area(cmd, &tile); + struct tu_tile_config *tile = &_tile; + if (has_fdm) { + tile = &tiles[ty * vsc->tile_count.width + (tx1 + tx)]; + if (tile->merged_tile || !tile->visible_views) + continue; + } else { + tu_calc_bin_visibility(cmd, tile, fdm_offsets); + tu_identity_frag_area(cmd, tile); + } - tu6_render_tile(cmd, &cmd->cs, &tile, fdm_offsets); + tu6_render_tile(cmd, &cmd->cs, tile, fdm_offsets); } slot_row += tile_row_stride; } @@ -4222,6 +4230,8 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd, * does its own stores. */ tu_cs_discard_entries(&cmd->tile_store_cs); + + free(tiles); } template