diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 876bd4615d6..2e58fd58ca9 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -3899,6 +3899,48 @@ remap_attachment(struct tu_cmd_buffer *cmd, unsigned a) return i; } +struct apply_sysmem_clear_coords_state { + unsigned view; + unsigned layer; + float z_clear_val; + VkRect2D rect; +}; + +static void +fdm_apply_sysmem_clear_coords(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + void *data, + VkRect2D bin, + unsigned views, + const VkExtent2D *frag_areas) +{ + const struct apply_sysmem_clear_coords_state *state = + (const struct apply_sysmem_clear_coords_state *)data; + assert(state->view < views); + + VkExtent2D frag_area = frag_areas[state->view]; + + VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin); + + unsigned x1 = state->rect.offset.x / frag_area.width + offset.x; + unsigned x2 = DIV_ROUND_UP(state->rect.offset.x + state->rect.extent.width, + frag_area.width) + offset.x; + unsigned y1 = state->rect.offset.y / frag_area.height + offset.y; + unsigned y2 = DIV_ROUND_UP(state->rect.offset.y + state->rect.extent.height, + frag_area.height) + offset.y; + + const float coords[] = { + x1, y1, + state->z_clear_val, + uif(state->layer), + x2, y2, + state->z_clear_val, + 1.0f, + }; + + r3d_coords_raw(cmd, cs, coords); +} + template static void tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, @@ -4036,6 +4078,9 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, tu6_emit_blit_consts_load(cmd, cs, CP_LOAD_STATE6_FRAG, SB6_FS_SHADER, 0, packed_clear_value, num_rts); + if (cmd->state.fdm_enabled) + tu_cs_set_writeable(cs, true); + for (uint32_t i = 0; i < rect_count; i++) { /* This should be true because of this valid usage for * vkCmdClearAttachments: @@ -4053,18 +4098,31 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, */ for_each_layer(layer, subpass->multiview_mask, rects[i].layerCount) { - const float coords[] = { - rects[i].rect.offset.x, - rects[i].rect.offset.y, - z_clear_val, - uif(rects[i].baseArrayLayer + layer), - rects[i].rect.offset.x + rects[i].rect.extent.width, - rects[i].rect.offset.y + rects[i].rect.extent.height, - z_clear_val, - 1.0f, - }; + if (cmd->state.fdm_enabled) { + struct apply_sysmem_clear_coords_state state = { + .view = subpass->multiview_mask ? layer : 0, + .layer = rects[i].baseArrayLayer + layer, + .z_clear_val = z_clear_val, + .rect = rects[i].rect, + }; + tu_create_fdm_bin_patchpoint(cmd, cs, 4, + fdm_apply_sysmem_clear_coords, + state); + } else { + const float coords[] = { + rects[i].rect.offset.x, + rects[i].rect.offset.y, + z_clear_val, + uif(rects[i].baseArrayLayer + layer), + rects[i].rect.offset.x + rects[i].rect.extent.width, + rects[i].rect.offset.y + rects[i].rect.extent.height, + z_clear_val, + 1.0f, + }; + + r3d_coords_raw(cmd, cs, coords); + } - r3d_coords_raw(cmd, cs, coords); r3d_run_vis(cmd, cs); } } @@ -4076,6 +4134,9 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, tu_emit_event_write(cmd, cs, FD_START_PRIMITIVE_CTRS); } + if (cmd->state.fdm_enabled) + tu_cs_set_writeable(cs, false); + trace_end_sysmem_clear_all(&cmd->trace, cs); } @@ -4112,6 +4173,41 @@ clear_gmem_attachment(struct tu_cmd_buffer *cmd, tu_emit_event_write(cmd, cs, FD_BLIT); } +struct apply_gmem_clear_coords_state { + unsigned view; + VkRect2D rect; +}; + +static void +fdm_apply_gmem_clear_coords(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + void *data, + VkRect2D bin, + unsigned views, + const VkExtent2D *frag_areas) +{ + const struct apply_gmem_clear_coords_state *state = + (const struct apply_gmem_clear_coords_state *)data; + assert(state->view < views); + + VkExtent2D frag_area = frag_areas[state->view]; + + VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin); + + unsigned x1 = state->rect.offset.x / frag_area.width + offset.x; + unsigned x2 = DIV_ROUND_UP(state->rect.offset.x + state->rect.extent.width, + frag_area.width) + offset.x - 1; + unsigned y1 = state->rect.offset.y / frag_area.height + offset.y; + unsigned y2 = DIV_ROUND_UP(state->rect.offset.y + state->rect.extent.height, + frag_area.height) + offset.y - 1; + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); + tu_cs_emit(cs, + A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); + tu_cs_emit(cs, + A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); +} + template static void tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, @@ -4122,7 +4218,8 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, uint32_t layers, uint32_t layer_mask, VkImageAspectFlags mask, - const VkClearValue *value) + const VkClearValue *value, + const VkRect2D *fdm_rect) { const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[attachment]; @@ -4135,6 +4232,14 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, enum pipe_format format = vk_format_to_pipe_format(att->format); for_each_layer(i, layer_mask, layers) { uint32_t layer = i + base_layer; + if (fdm_rect) { + struct apply_gmem_clear_coords_state state = { + .view = layer, + .rect = *fdm_rect, + }; + tu_create_fdm_bin_patchpoint(cmd, cs, 3, + fdm_apply_gmem_clear_coords, state); + } if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) { uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, VK_FORMAT_D32_SFLOAT); @@ -4174,15 +4279,33 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, struct tu_resolve_group resolve_group = {}; + if (cmd->state.fdm_enabled) + tu_cs_set_writeable(cs, true); + for (unsigned i = 0; i < rect_count; i++) { unsigned x1 = rects[i].rect.offset.x; unsigned y1 = rects[i].rect.offset.y; unsigned x2 = x1 + rects[i].rect.extent.width - 1; unsigned y2 = y1 + rects[i].rect.extent.height - 1; - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); - tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); - tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); + const VkRect2D *fdm_rect = NULL; + if (cmd->state.fdm_enabled) { + if (!subpass->multiview_mask) { + struct apply_gmem_clear_coords_state state = { + .view = 0, + .rect = rects[i].rect, + }; + tu_create_fdm_bin_patchpoint(cmd, cs, 3, + fdm_apply_gmem_clear_coords, state); + } else { + /* We need to patch the clear rectangle for each view. */ + fdm_rect = &rects[i].rect; + } + } else { + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); + tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); + tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); + } for (unsigned j = 0; j < attachment_count; j++) { uint32_t a; @@ -4199,11 +4322,15 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, rects[i].layerCount, subpass->multiview_mask, attachments[j].aspectMask, - &attachments[j].clearValue); + &attachments[j].clearValue, + fdm_rect); } } tu_emit_resolve_group(cmd, cs, &resolve_group); + + if (cmd->state.fdm_enabled) + tu_cs_set_writeable(cs, false); } template @@ -4277,16 +4404,29 @@ tu7_clear_attachment_generic_single_rect( const VkClearRect *rect) { const struct tu_subpass *subpass = cmd->state.subpass; - unsigned x1 = rect->rect.offset.x; - unsigned y1 = rect->rect.offset.y; - unsigned x2 = x1 + rect->rect.extent.width - 1; - unsigned y2 = y1 + rect->rect.extent.height - 1; - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); - tu_cs_emit(cs, - A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); - tu_cs_emit(cs, - A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); + if (cmd->state.fdm_enabled) { + tu_cs_set_writeable(cs, true); + + if (!subpass->multiview_mask) { + struct apply_gmem_clear_coords_state state = { + .view = 0, + .rect = rect->rect, + }; + tu_create_fdm_bin_patchpoint(cmd, cs, 3, + fdm_apply_gmem_clear_coords, state); + } + } else { + unsigned x1 = rect->rect.offset.x; + unsigned y1 = rect->rect.offset.y; + unsigned x2 = x1 + rect->rect.extent.width - 1; + unsigned y2 = y1 + rect->rect.extent.height - 1; + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); + tu_cs_emit(cs, + A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); + tu_cs_emit(cs, + A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); + } auto value = &clear_att->clearValue; @@ -4296,6 +4436,15 @@ tu7_clear_attachment_generic_single_rect( uint32_t mask = aspect_write_mask_generic_clear(format, clear_att->aspectMask); + if (cmd->state.fdm_enabled && subpass->multiview_mask) { + struct apply_gmem_clear_coords_state state = { + .view = layer, + .rect = rect->rect, + }; + tu_create_fdm_bin_patchpoint(cmd, cs, 3, + fdm_apply_gmem_clear_coords, state); + } + if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (clear_att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { uint32_t buffer_id = tu_resolve_group_include_buffer(resolve_group, VK_FORMAT_D32_SFLOAT); @@ -4312,6 +4461,9 @@ tu7_clear_attachment_generic_single_rect( tu7_generic_layer_clear(cmd, cs, buffer_id, format, mask, false, layer, value, a); } } + + if (cmd->state.fdm_enabled) + tu_cs_set_writeable(cs, false); } static void @@ -4512,7 +4664,7 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, cmd->state.framebuffer->layers, attachment->clear_views, attachment->clear_mask, - &cmd->state.clear_values[a]); + &cmd->state.clear_values[a], NULL); } TU_GENX(tu_clear_gmem_attachment); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 5de0cf30873..110c714ef32 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -2088,6 +2088,8 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd) */ BITSET_SET(cmd->vk.dynamic_graphics_state.dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE); + + cmd->state.fdm_enabled = cmd->state.pass->has_fdm; } template @@ -2967,10 +2969,18 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, vk_common_CmdSetRenderingAttachmentLocationsKHR(commandBuffer, location_info); } + /* Unfortunately with dynamic renderpasses we get no indication + * whether FDM is used in secondaries, so we have to assume it + * always might be enabled. + */ + cmd_buffer->state.fdm_enabled = + cmd_buffer->device->vk.enabled_features.fragmentDensityMap || + TU_DEBUG(FDM); } else { cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); cmd_buffer->state.subpass = &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; + cmd_buffer->state.fdm_enabled = cmd_buffer->state.pass->has_fdm; } tu_fill_render_pass_state(&cmd_buffer->state.vk_rp, cmd_buffer->state.pass, diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index a4d1f42a8f1..52727be352a 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -512,6 +512,8 @@ struct tu_cmd_state struct tu_lrz_state lrz; } suspended_pass; + bool fdm_enabled; + bool tessfactor_addr_set; bool predication_active; bool msaa_disable;