tu: Fix CmdClearAttachments with fragment density map

The clear may be a partial clear, in which case we need to make sure
that the clear rectangle is transformed into GMEM space so that it is
clipped correctly.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33500>
This commit is contained in:
Connor Abbott 2025-01-30 13:48:44 -05:00 committed by Marge Bot
parent 0d4eed0e46
commit df0c17f76e
3 changed files with 190 additions and 26 deletions

View file

@ -3899,6 +3899,48 @@ remap_attachment(struct tu_cmd_buffer *cmd, unsigned a)
return i;
}
struct apply_sysmem_clear_coords_state {
unsigned view;
unsigned layer;
float z_clear_val;
VkRect2D rect;
};
static void
fdm_apply_sysmem_clear_coords(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
void *data,
VkRect2D bin,
unsigned views,
const VkExtent2D *frag_areas)
{
const struct apply_sysmem_clear_coords_state *state =
(const struct apply_sysmem_clear_coords_state *)data;
assert(state->view < views);
VkExtent2D frag_area = frag_areas[state->view];
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin);
unsigned x1 = state->rect.offset.x / frag_area.width + offset.x;
unsigned x2 = DIV_ROUND_UP(state->rect.offset.x + state->rect.extent.width,
frag_area.width) + offset.x;
unsigned y1 = state->rect.offset.y / frag_area.height + offset.y;
unsigned y2 = DIV_ROUND_UP(state->rect.offset.y + state->rect.extent.height,
frag_area.height) + offset.y;
const float coords[] = {
x1, y1,
state->z_clear_val,
uif(state->layer),
x2, y2,
state->z_clear_val,
1.0f,
};
r3d_coords_raw(cmd, cs, coords);
}
template <chip CHIP>
static void
tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
@ -4036,6 +4078,9 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
tu6_emit_blit_consts_load(cmd, cs, CP_LOAD_STATE6_FRAG, SB6_FS_SHADER,
0, packed_clear_value, num_rts);
if (cmd->state.fdm_enabled)
tu_cs_set_writeable(cs, true);
for (uint32_t i = 0; i < rect_count; i++) {
/* This should be true because of this valid usage for
* vkCmdClearAttachments:
@ -4053,18 +4098,31 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
*/
for_each_layer(layer, subpass->multiview_mask, rects[i].layerCount)
{
const float coords[] = {
rects[i].rect.offset.x,
rects[i].rect.offset.y,
z_clear_val,
uif(rects[i].baseArrayLayer + layer),
rects[i].rect.offset.x + rects[i].rect.extent.width,
rects[i].rect.offset.y + rects[i].rect.extent.height,
z_clear_val,
1.0f,
};
if (cmd->state.fdm_enabled) {
struct apply_sysmem_clear_coords_state state = {
.view = subpass->multiview_mask ? layer : 0,
.layer = rects[i].baseArrayLayer + layer,
.z_clear_val = z_clear_val,
.rect = rects[i].rect,
};
tu_create_fdm_bin_patchpoint(cmd, cs, 4,
fdm_apply_sysmem_clear_coords,
state);
} else {
const float coords[] = {
rects[i].rect.offset.x,
rects[i].rect.offset.y,
z_clear_val,
uif(rects[i].baseArrayLayer + layer),
rects[i].rect.offset.x + rects[i].rect.extent.width,
rects[i].rect.offset.y + rects[i].rect.extent.height,
z_clear_val,
1.0f,
};
r3d_coords_raw(cmd, cs, coords);
}
r3d_coords_raw(cmd, cs, coords);
r3d_run_vis(cmd, cs);
}
}
@ -4076,6 +4134,9 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
tu_emit_event_write<CHIP>(cmd, cs, FD_START_PRIMITIVE_CTRS);
}
if (cmd->state.fdm_enabled)
tu_cs_set_writeable(cs, false);
trace_end_sysmem_clear_all(&cmd->trace, cs);
}
@ -4112,6 +4173,41 @@ clear_gmem_attachment(struct tu_cmd_buffer *cmd,
tu_emit_event_write<CHIP>(cmd, cs, FD_BLIT);
}
struct apply_gmem_clear_coords_state {
unsigned view;
VkRect2D rect;
};
static void
fdm_apply_gmem_clear_coords(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
void *data,
VkRect2D bin,
unsigned views,
const VkExtent2D *frag_areas)
{
const struct apply_gmem_clear_coords_state *state =
(const struct apply_gmem_clear_coords_state *)data;
assert(state->view < views);
VkExtent2D frag_area = frag_areas[state->view];
VkOffset2D offset = tu_fdm_per_bin_offset(frag_area, bin);
unsigned x1 = state->rect.offset.x / frag_area.width + offset.x;
unsigned x2 = DIV_ROUND_UP(state->rect.offset.x + state->rect.extent.width,
frag_area.width) + offset.x - 1;
unsigned y1 = state->rect.offset.y / frag_area.height + offset.y;
unsigned y2 = DIV_ROUND_UP(state->rect.offset.y + state->rect.extent.height,
frag_area.height) + offset.y - 1;
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
tu_cs_emit(cs,
A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
tu_cs_emit(cs,
A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
}
template <chip CHIP>
static void
tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
@ -4122,7 +4218,8 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
uint32_t layers,
uint32_t layer_mask,
VkImageAspectFlags mask,
const VkClearValue *value)
const VkClearValue *value,
const VkRect2D *fdm_rect)
{
const struct tu_render_pass_attachment *att =
&cmd->state.pass->attachments[attachment];
@ -4135,6 +4232,14 @@ tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
enum pipe_format format = vk_format_to_pipe_format(att->format);
for_each_layer(i, layer_mask, layers) {
uint32_t layer = i + base_layer;
if (fdm_rect) {
struct apply_gmem_clear_coords_state state = {
.view = layer,
.rect = *fdm_rect,
};
tu_create_fdm_bin_patchpoint(cmd, cs, 3,
fdm_apply_gmem_clear_coords, state);
}
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
uint32_t buffer_id = tu_resolve_group_include_buffer<CHIP>(resolve_group, VK_FORMAT_D32_SFLOAT);
@ -4174,15 +4279,33 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
struct tu_resolve_group resolve_group = {};
if (cmd->state.fdm_enabled)
tu_cs_set_writeable(cs, true);
for (unsigned i = 0; i < rect_count; i++) {
unsigned x1 = rects[i].rect.offset.x;
unsigned y1 = rects[i].rect.offset.y;
unsigned x2 = x1 + rects[i].rect.extent.width - 1;
unsigned y2 = y1 + rects[i].rect.extent.height - 1;
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
const VkRect2D *fdm_rect = NULL;
if (cmd->state.fdm_enabled) {
if (!subpass->multiview_mask) {
struct apply_gmem_clear_coords_state state = {
.view = 0,
.rect = rects[i].rect,
};
tu_create_fdm_bin_patchpoint(cmd, cs, 3,
fdm_apply_gmem_clear_coords, state);
} else {
/* We need to patch the clear rectangle for each view. */
fdm_rect = &rects[i].rect;
}
} else {
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
}
for (unsigned j = 0; j < attachment_count; j++) {
uint32_t a;
@ -4199,11 +4322,15 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
rects[i].layerCount,
subpass->multiview_mask,
attachments[j].aspectMask,
&attachments[j].clearValue);
&attachments[j].clearValue,
fdm_rect);
}
}
tu_emit_resolve_group<CHIP>(cmd, cs, &resolve_group);
if (cmd->state.fdm_enabled)
tu_cs_set_writeable(cs, false);
}
template <chip CHIP>
@ -4277,16 +4404,29 @@ tu7_clear_attachment_generic_single_rect(
const VkClearRect *rect)
{
const struct tu_subpass *subpass = cmd->state.subpass;
unsigned x1 = rect->rect.offset.x;
unsigned y1 = rect->rect.offset.y;
unsigned x2 = x1 + rect->rect.extent.width - 1;
unsigned y2 = y1 + rect->rect.extent.height - 1;
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
tu_cs_emit(cs,
A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
tu_cs_emit(cs,
A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
if (cmd->state.fdm_enabled) {
tu_cs_set_writeable(cs, true);
if (!subpass->multiview_mask) {
struct apply_gmem_clear_coords_state state = {
.view = 0,
.rect = rect->rect,
};
tu_create_fdm_bin_patchpoint(cmd, cs, 3,
fdm_apply_gmem_clear_coords, state);
}
} else {
unsigned x1 = rect->rect.offset.x;
unsigned y1 = rect->rect.offset.y;
unsigned x2 = x1 + rect->rect.extent.width - 1;
unsigned y2 = y1 + rect->rect.extent.height - 1;
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
tu_cs_emit(cs,
A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
tu_cs_emit(cs,
A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
}
auto value = &clear_att->clearValue;
@ -4296,6 +4436,15 @@ tu7_clear_attachment_generic_single_rect(
uint32_t mask =
aspect_write_mask_generic_clear(format, clear_att->aspectMask);
if (cmd->state.fdm_enabled && subpass->multiview_mask) {
struct apply_gmem_clear_coords_state state = {
.view = layer,
.rect = rect->rect,
};
tu_create_fdm_bin_patchpoint(cmd, cs, 3,
fdm_apply_gmem_clear_coords, state);
}
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
if (clear_att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
uint32_t buffer_id = tu_resolve_group_include_buffer<A7XX>(resolve_group, VK_FORMAT_D32_SFLOAT);
@ -4312,6 +4461,9 @@ tu7_clear_attachment_generic_single_rect(
tu7_generic_layer_clear(cmd, cs, buffer_id, format, mask, false, layer, value, a);
}
}
if (cmd->state.fdm_enabled)
tu_cs_set_writeable(cs, false);
}
static void
@ -4512,7 +4664,7 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
cmd->state.framebuffer->layers,
attachment->clear_views,
attachment->clear_mask,
&cmd->state.clear_values[a]);
&cmd->state.clear_values[a], NULL);
}
TU_GENX(tu_clear_gmem_attachment);

View file

@ -2088,6 +2088,8 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd)
*/
BITSET_SET(cmd->vk.dynamic_graphics_state.dirty,
MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE);
cmd->state.fdm_enabled = cmd->state.pass->has_fdm;
}
template <chip CHIP>
@ -2967,10 +2969,18 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
vk_common_CmdSetRenderingAttachmentLocationsKHR(commandBuffer,
location_info);
}
/* Unfortunately with dynamic renderpasses we get no indication
* whether FDM is used in secondaries, so we have to assume it
* always might be enabled.
*/
cmd_buffer->state.fdm_enabled =
cmd_buffer->device->vk.enabled_features.fragmentDensityMap ||
TU_DEBUG(FDM);
} else {
cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
cmd_buffer->state.subpass =
&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
cmd_buffer->state.fdm_enabled = cmd_buffer->state.pass->has_fdm;
}
tu_fill_render_pass_state(&cmd_buffer->state.vk_rp,
cmd_buffer->state.pass,

View file

@ -512,6 +512,8 @@ struct tu_cmd_state
struct tu_lrz_state lrz;
} suspended_pass;
bool fdm_enabled;
bool tessfactor_addr_set;
bool predication_active;
bool msaa_disable;