mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
tu/a7xx: Use generic clear for LOAD_OP_CLEAR
Aside from being just nicer it does UBWC fast-clear. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30270>
This commit is contained in:
parent
49193771f6
commit
b88b076870
9 changed files with 309 additions and 150 deletions
|
|
@ -285,6 +285,9 @@ struct fd_dev_info {
|
|||
bool ubwc_all_formats_compatible;
|
||||
|
||||
bool has_compliant_dp4acc;
|
||||
|
||||
/* Whether a single clear blit could be used for both sysmem and gmem.*/
|
||||
bool has_generic_clear;
|
||||
} a7xx;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -877,6 +877,7 @@ a7xx_750 = A7XXProps(
|
|||
# example dEQP-VK.image.load_store.with_format.2d.*. Disable this for
|
||||
# now.
|
||||
#supports_ibo_ubwc = True,
|
||||
has_generic_clear = True,
|
||||
gs_vpc_adjacency_quirk = True,
|
||||
storage_8bit = True,
|
||||
ubwc_all_formats_compatible = True,
|
||||
|
|
|
|||
|
|
@ -3861,9 +3861,13 @@ to upconvert to 32b float internally?
|
|||
-->
|
||||
<bitfield name="BUFFER_ID" low="12" high="15"/>
|
||||
</reg32>
|
||||
<reg32 offset="0x88e4" name="RB_UNKNOWN_88E4" variants="A7XX-" usage="rp_blit">
|
||||
<!-- Value conditioned based on predicate, changed before blits -->
|
||||
<bitfield name="UNK0" pos="0" type="boolean"/>
|
||||
|
||||
<enum name="a7xx_blit_clear_mode">
|
||||
<value value="0x0" name="CLEAR_MODE_SYSMEM"/>
|
||||
<value value="0x1" name="CLEAR_MODE_GMEM"/>
|
||||
</enum>
|
||||
<reg32 offset="0x88e4" name="RB_BLIT_CLEAR_MODE" variants="A7XX-" usage="rp_blit">
|
||||
<bitfield name="CLEAR_MODE" pos="0" type="a7xx_blit_clear_mode"/>
|
||||
</reg32>
|
||||
|
||||
<enum name="a6xx_ccu_cache_size">
|
||||
|
|
|
|||
|
|
@ -1467,6 +1467,21 @@ aspect_write_mask(enum pipe_format format, VkImageAspectFlags aspect_mask)
|
|||
return mask;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
aspect_write_mask_generic_clear(enum pipe_format format, VkImageAspectFlags aspect_mask)
|
||||
{
|
||||
uint8_t mask = 0xf;
|
||||
assert(aspect_mask);
|
||||
/* note: the only format with partial writing is D24S8 */
|
||||
if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
|
||||
if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
mask = 0x1;
|
||||
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
mask = 0x2;
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
enum r3d_blit_param {
|
||||
R3D_Z_SCALE = 1 << 0,
|
||||
R3D_DST_GMEM = 1 << 1,
|
||||
|
|
@ -1752,6 +1767,181 @@ copy_format(VkFormat vk_format, VkImageAspectFlags aspect_mask)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pack_blit_event_clear_value(const VkClearValue *val, enum pipe_format format, uint32_t clear_value[4])
|
||||
{
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) |
|
||||
val->depthStencil.stencil << 24;
|
||||
return;
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16);
|
||||
return;
|
||||
case PIPE_FORMAT_Z32_FLOAT:
|
||||
clear_value[0] = fui(val->depthStencil.depth);
|
||||
return;
|
||||
case PIPE_FORMAT_S8_UINT:
|
||||
clear_value[0] = val->depthStencil.stencil;
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
float tmp[4];
|
||||
memcpy(tmp, val->color.float32, 4 * sizeof(float));
|
||||
if (util_format_is_srgb(format)) {
|
||||
for (int i = 0; i < 3; i++)
|
||||
tmp[i] = util_format_linear_to_srgb_float(tmp[i]);
|
||||
}
|
||||
|
||||
#define PACK_F(type) util_format_##type##_pack_rgba_float \
|
||||
( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
|
||||
switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
|
||||
case 4:
|
||||
PACK_F(r4g4b4a4_unorm);
|
||||
break;
|
||||
case 5:
|
||||
if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6)
|
||||
PACK_F(r5g6b5_unorm);
|
||||
else
|
||||
PACK_F(r5g5b5a1_unorm);
|
||||
break;
|
||||
case 8:
|
||||
if (util_format_is_snorm(format))
|
||||
PACK_F(r8g8b8a8_snorm);
|
||||
else if (util_format_is_unorm(format))
|
||||
PACK_F(r8g8b8a8_unorm);
|
||||
else
|
||||
pack_int8(clear_value, val->color.uint32);
|
||||
break;
|
||||
case 10:
|
||||
if (util_format_is_pure_integer(format))
|
||||
pack_int10_2(clear_value, val->color.uint32);
|
||||
else
|
||||
PACK_F(r10g10b10a2_unorm);
|
||||
break;
|
||||
case 11:
|
||||
clear_value[0] = float3_to_r11g11b10f(val->color.float32);
|
||||
break;
|
||||
case 16:
|
||||
if (util_format_is_snorm(format))
|
||||
PACK_F(r16g16b16a16_snorm);
|
||||
else if (util_format_is_unorm(format))
|
||||
PACK_F(r16g16b16a16_unorm);
|
||||
else if (util_format_is_float(format))
|
||||
PACK_F(r16g16b16a16_float);
|
||||
else
|
||||
pack_int16(clear_value, val->color.uint32);
|
||||
break;
|
||||
case 32:
|
||||
memcpy(clear_value, val->color.float32, 4 * sizeof(float));
|
||||
break;
|
||||
case 0:
|
||||
assert(format == PIPE_FORMAT_A8_UNORM);
|
||||
PACK_F(a8_unorm);
|
||||
break;
|
||||
default:
|
||||
unreachable("unexpected channel size");
|
||||
}
|
||||
#undef PACK_F
|
||||
}
|
||||
|
||||
static void
|
||||
event_blit_setup(struct tu_cs *cs,
|
||||
const struct tu_render_pass_attachment *att,
|
||||
enum a6xx_blit_event_type blit_event_type,
|
||||
uint32_t clear_mask)
|
||||
{
|
||||
tu_cs_emit_regs(
|
||||
cs, A6XX_RB_BLIT_GMEM_MSAA_CNTL(tu_msaa_samples(att->samples)));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
tu_cs_emit_regs(
|
||||
cs,
|
||||
A6XX_RB_BLIT_INFO(.type = blit_event_type,
|
||||
.sample_0 =
|
||||
vk_format_is_int(att->format) ||
|
||||
vk_format_is_depth_or_stencil(att->format),
|
||||
.depth = vk_format_is_depth_or_stencil(att->format),
|
||||
.clear_mask = clear_mask, ));
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
event_blit_run(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_render_pass_attachment *att,
|
||||
const struct tu_image_view *iview,
|
||||
bool separate_stencil,
|
||||
uint32_t layer)
|
||||
{
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
|
||||
if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
if (!separate_stencil) {
|
||||
tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO));
|
||||
tu_cs_emit_qw(
|
||||
cs, iview->depth_base_addr + iview->depth_layer_size * layer);
|
||||
tu_cs_emit(cs, A6XX_RB_2D_DST_PITCH(iview->depth_pitch).value);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
|
||||
tu_cs_image_flag_ref(cs, &iview->view, layer);
|
||||
} else {
|
||||
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) &
|
||||
~A6XX_RB_BLIT_DST_INFO_FLAGS);
|
||||
tu_cs_emit_qw(
|
||||
cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(iview->stencil_pitch).value);
|
||||
}
|
||||
} else {
|
||||
tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO);
|
||||
tu_cs_image_ref_2d<CHIP>(cs, &iview->view, layer, false);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
|
||||
tu_cs_image_flag_ref(cs, &iview->view, layer);
|
||||
}
|
||||
|
||||
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) {
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_BASE_GMEM(
|
||||
tu_attachment_gmem_offset_stencil(cmd, att, layer)));
|
||||
} else {
|
||||
tu_cs_emit_regs(cs, A6XX_RB_BLIT_BASE_GMEM(
|
||||
tu_attachment_gmem_offset(cmd, att, layer)));
|
||||
}
|
||||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_BLIT);
|
||||
}
|
||||
|
||||
static void
|
||||
tu7_generic_layer_clear(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
enum pipe_format format,
|
||||
uint8_t clear_mask,
|
||||
bool separate_stencil,
|
||||
uint32_t layer,
|
||||
const VkClearValue *value,
|
||||
uint32_t a)
|
||||
{
|
||||
const struct tu_render_pass_attachment *att =
|
||||
&cmd->state.pass->attachments[a];
|
||||
const struct tu_image_view *iview = cmd->state.attachments[a];
|
||||
|
||||
uint32_t clear_vals[4] = {};
|
||||
pack_blit_event_clear_value(value, format, clear_vals);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
|
||||
tu_cs_emit_array(cs, clear_vals, 4);
|
||||
|
||||
event_blit_setup(cs, att, BLIT_EVENT_CLEAR, clear_mask);
|
||||
event_blit_run<A7XX>(cmd, cs, att, iview, separate_stencil, layer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Copies/fills/updates for buffers are happening through CCU but need
|
||||
* additional synchronization when write range is not aligned to 64 bytes.
|
||||
* Because dst buffer access uses either R8_UNORM or R32_UINT and they are not
|
||||
|
|
@ -3132,87 +3322,6 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
|
|||
trace_end_sysmem_clear_all(&cmd->trace, cs);
|
||||
}
|
||||
|
||||
static void
|
||||
pack_gmem_clear_value(const VkClearValue *val, enum pipe_format format, uint32_t clear_value[4])
|
||||
{
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) |
|
||||
val->depthStencil.stencil << 24;
|
||||
return;
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16);
|
||||
return;
|
||||
case PIPE_FORMAT_Z32_FLOAT:
|
||||
clear_value[0] = fui(val->depthStencil.depth);
|
||||
return;
|
||||
case PIPE_FORMAT_S8_UINT:
|
||||
clear_value[0] = val->depthStencil.stencil;
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
float tmp[4];
|
||||
memcpy(tmp, val->color.float32, 4 * sizeof(float));
|
||||
if (util_format_is_srgb(format)) {
|
||||
for (int i = 0; i < 3; i++)
|
||||
tmp[i] = util_format_linear_to_srgb_float(tmp[i]);
|
||||
}
|
||||
|
||||
#define PACK_F(type) util_format_##type##_pack_rgba_float \
|
||||
( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
|
||||
switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
|
||||
case 4:
|
||||
PACK_F(r4g4b4a4_unorm);
|
||||
break;
|
||||
case 5:
|
||||
if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6)
|
||||
PACK_F(r5g6b5_unorm);
|
||||
else
|
||||
PACK_F(r5g5b5a1_unorm);
|
||||
break;
|
||||
case 8:
|
||||
if (util_format_is_snorm(format))
|
||||
PACK_F(r8g8b8a8_snorm);
|
||||
else if (util_format_is_unorm(format))
|
||||
PACK_F(r8g8b8a8_unorm);
|
||||
else
|
||||
pack_int8(clear_value, val->color.uint32);
|
||||
break;
|
||||
case 10:
|
||||
if (util_format_is_pure_integer(format))
|
||||
pack_int10_2(clear_value, val->color.uint32);
|
||||
else
|
||||
PACK_F(r10g10b10a2_unorm);
|
||||
break;
|
||||
case 11:
|
||||
clear_value[0] = float3_to_r11g11b10f(val->color.float32);
|
||||
break;
|
||||
case 16:
|
||||
if (util_format_is_snorm(format))
|
||||
PACK_F(r16g16b16a16_snorm);
|
||||
else if (util_format_is_unorm(format))
|
||||
PACK_F(r16g16b16a16_unorm);
|
||||
else if (util_format_is_float(format))
|
||||
PACK_F(r16g16b16a16_float);
|
||||
else
|
||||
pack_int16(clear_value, val->color.uint32);
|
||||
break;
|
||||
case 32:
|
||||
memcpy(clear_value, val->color.float32, 4 * sizeof(float));
|
||||
break;
|
||||
case 0:
|
||||
assert(format == PIPE_FORMAT_A8_UNORM);
|
||||
PACK_F(a8_unorm);
|
||||
break;
|
||||
default:
|
||||
unreachable("unexpected channel size");
|
||||
}
|
||||
#undef PACK_F
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
|
|
@ -3236,14 +3345,11 @@ clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit(cs, 0);
|
||||
|
||||
uint32_t clear_vals[4] = {};
|
||||
pack_gmem_clear_value(value, format, clear_vals);
|
||||
pack_blit_event_clear_value(value, format, clear_vals);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
|
||||
tu_cs_emit_array(cs, clear_vals, 4);
|
||||
|
||||
if (CHIP >= A7XX)
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_88E4(.unk0 = 1));
|
||||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_BLIT);
|
||||
}
|
||||
|
||||
|
|
@ -3513,65 +3619,60 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
TU_GENX(tu_clear_gmem_attachment);
|
||||
|
||||
void
|
||||
tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a)
|
||||
{
|
||||
const struct tu_render_pass_attachment *att =
|
||||
&cmd->state.pass->attachments[a];
|
||||
const VkClearValue *value = &cmd->state.clear_values[a];
|
||||
const struct tu_image_view *iview = cmd->state.attachments[a];
|
||||
|
||||
trace_start_generic_clear(&cmd->trace, cs, att->format,
|
||||
iview->view.ubwc_enabled, att->samples);
|
||||
|
||||
enum pipe_format format = vk_format_to_pipe_format(att->format);
|
||||
for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) {
|
||||
uint32_t layer = i + 0;
|
||||
uint32_t mask =
|
||||
aspect_write_mask_generic_clear(format, att->clear_mask);
|
||||
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
if (att->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
tu7_generic_layer_clear(cmd, cs, PIPE_FORMAT_Z32_FLOAT, mask,
|
||||
false, layer, value, a);
|
||||
}
|
||||
if (att->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
tu7_generic_layer_clear(cmd, cs, PIPE_FORMAT_S8_UINT, mask, true,
|
||||
layer, value, a);
|
||||
}
|
||||
} else {
|
||||
tu7_generic_layer_clear(cmd, cs, format, mask, false, layer, value, a);
|
||||
}
|
||||
}
|
||||
|
||||
tu_flush_for_access(&cmd->state.renderpass_cache,
|
||||
TU_ACCESS_BLIT_WRITE_GMEM, TU_ACCESS_NONE);
|
||||
|
||||
trace_end_generic_clear(&cmd->trace, cs);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
tu_emit_blit(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
const struct tu_render_pass_attachment *attachment,
|
||||
bool resolve,
|
||||
enum a6xx_blit_event_type blit_event_type,
|
||||
bool separate_stencil)
|
||||
{
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_GMEM_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(
|
||||
.type = resolve ? BLIT_EVENT_STORE : BLIT_EVENT_LOAD,
|
||||
.sample_0 = vk_format_is_int(attachment->format) ||
|
||||
vk_format_is_depth_or_stencil(attachment->format),
|
||||
.depth = vk_format_is_depth_or_stencil(attachment->format),));
|
||||
assert(blit_event_type != BLIT_EVENT_CLEAR);
|
||||
event_blit_setup(cs, attachment, blit_event_type, 0x0);
|
||||
|
||||
for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
|
||||
if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
if (!separate_stencil) {
|
||||
tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO));
|
||||
tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * i);
|
||||
tu_cs_emit(cs, A6XX_RB_2D_DST_PITCH(iview->depth_pitch).value);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
|
||||
tu_cs_image_flag_ref(cs, &iview->view, i);
|
||||
} else {
|
||||
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
|
||||
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * i);
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(iview->stencil_pitch).value);
|
||||
}
|
||||
} else {
|
||||
tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO);
|
||||
tu_cs_image_ref_2d<CHIP>(cs, &iview->view, i, false);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
|
||||
tu_cs_image_flag_ref(cs, &iview->view, i);
|
||||
}
|
||||
|
||||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) {
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset_stencil(cmd, attachment, i)));
|
||||
} else {
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset(cmd, attachment, i)));
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
if (CHIP >= A7XX)
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_88E4(.unk0 = 1));
|
||||
|
||||
tu_emit_event_write<CHIP>(cmd, cs, FD_BLIT);
|
||||
event_blit_run<CHIP>(cmd, cs, attachment, iview, separate_stencil, i);
|
||||
}
|
||||
|
||||
tu_flush_for_access(&cmd->state.cache, TU_ACCESS_BLIT_WRITE_GMEM, TU_ACCESS_NONE);
|
||||
tu_flush_for_access(&cmd->state.cache, TU_ACCESS_BLIT_WRITE_GMEM,
|
||||
TU_ACCESS_NONE);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -3796,10 +3897,10 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
load_3d_blit<CHIP>(cmd, cs, iview, attachment, true);
|
||||
} else {
|
||||
if (load_common)
|
||||
tu_emit_blit<CHIP>(cmd, cs, iview, attachment, false, false);
|
||||
tu_emit_blit<CHIP>(cmd, cs, iview, attachment, BLIT_EVENT_LOAD, false);
|
||||
|
||||
if (load_stencil)
|
||||
tu_emit_blit<CHIP>(cmd, cs, iview, attachment, false, true);
|
||||
tu_emit_blit<CHIP>(cmd, cs, iview, attachment, BLIT_EVENT_LOAD, true);
|
||||
}
|
||||
|
||||
if (cond_exec)
|
||||
|
|
@ -4115,9 +4216,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
/* use fast path when render area is aligned, except for unsupported resolve cases */
|
||||
if (use_fast_path) {
|
||||
if (store_common)
|
||||
tu_emit_blit<CHIP>(cmd, cs, iview, src, true, false);
|
||||
tu_emit_blit<CHIP>(cmd, cs, iview, src, BLIT_EVENT_STORE, false);
|
||||
if (store_separate_stencil)
|
||||
tu_emit_blit<CHIP>(cmd, cs, iview, src, true, true);
|
||||
tu_emit_blit<CHIP>(cmd, cs, iview, src, BLIT_EVENT_STORE, true);
|
||||
|
||||
if (cond_exec) {
|
||||
tu_end_load_store_cond_exec(cmd, cs, false);
|
||||
|
|
|
|||
|
|
@ -46,6 +46,11 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
struct tu_cs *cs,
|
||||
uint32_t a);
|
||||
|
||||
void
|
||||
tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t a);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
|
|
|
|||
|
|
@ -1909,6 +1909,8 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8E09(0x4));
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_SYSMEM));
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
|
|
@ -1975,6 +1977,8 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
|
||||
tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
|
||||
tu_cs_emit_regs(cs, A7XX_RB_UNKNOWN_8E09(0x4));
|
||||
|
||||
tu_cs_emit_regs(cs, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
|
||||
}
|
||||
|
||||
tu_emit_cache_flush_ccu<CHIP>(cmd, cs, TU_CMD_CCU_GMEM);
|
||||
|
|
@ -4343,16 +4347,19 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd)
|
|||
}
|
||||
}
|
||||
|
||||
/* Emit gmem clears that are first used in this subpass. */
|
||||
emitted_scissor = false;
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) {
|
||||
struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[i];
|
||||
if (att->clear_mask && att->first_subpass_idx == subpass_idx) {
|
||||
if (!emitted_scissor) {
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
emitted_scissor = true;
|
||||
if (!cmd->device->physical_device->info->a7xx.has_generic_clear) {
|
||||
/* Emit gmem clears that are first used in this subpass. */
|
||||
emitted_scissor = false;
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) {
|
||||
struct tu_render_pass_attachment *att =
|
||||
&cmd->state.pass->attachments[i];
|
||||
if (att->clear_mask && att->first_subpass_idx == subpass_idx) {
|
||||
if (!emitted_scissor) {
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
emitted_scissor = true;
|
||||
}
|
||||
tu_clear_gmem_attachment<CHIP>(cmd, cs, i);
|
||||
}
|
||||
tu_clear_gmem_attachment<CHIP>(cmd, cs, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -4368,6 +4375,9 @@ template <chip CHIP>
|
|||
static void
|
||||
tu_emit_subpass_begin_sysmem(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
if (cmd->device->physical_device->info->a7xx.has_generic_clear)
|
||||
return;
|
||||
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
uint32_t subpass_idx = cmd->state.subpass - cmd->state.pass->subpasses;
|
||||
|
||||
|
|
@ -4380,6 +4390,30 @@ tu_emit_subpass_begin_sysmem(struct tu_cmd_buffer *cmd)
|
|||
tu_cond_exec_end(cs); /* sysmem */
|
||||
}
|
||||
|
||||
static void
|
||||
tu7_emit_subpass_clear(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
if (cmd->state.render_area.extent.width == 0 ||
|
||||
cmd->state.render_area.extent.height == 0)
|
||||
return;
|
||||
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
uint32_t subpass_idx = cmd->state.subpass - cmd->state.pass->subpasses;
|
||||
|
||||
bool emitted_scissor = false;
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) {
|
||||
struct tu_render_pass_attachment *att =
|
||||
&cmd->state.pass->attachments[i];
|
||||
if (att->clear_mask && att->first_subpass_idx == subpass_idx) {
|
||||
if (!emitted_scissor) {
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
emitted_scissor = true;
|
||||
}
|
||||
tu7_generic_clear_attachment(cmd, cs, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* emit loads, clears, and mrt/zs/msaa/ubwc state for the subpass that is
|
||||
* starting (either at vkCmdBeginRenderPass2() or vkCmdNextSubpass2())
|
||||
*
|
||||
|
|
@ -4395,6 +4429,9 @@ tu_emit_subpass_begin(struct tu_cmd_buffer *cmd)
|
|||
|
||||
tu_emit_subpass_begin_gmem<CHIP>(cmd);
|
||||
tu_emit_subpass_begin_sysmem<CHIP>(cmd);
|
||||
if (cmd->device->physical_device->info->a7xx.has_generic_clear) {
|
||||
tu7_emit_subpass_clear(cmd);
|
||||
}
|
||||
|
||||
tu6_emit_zs<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
tu6_emit_mrt<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ enum tu_stage_id {
|
|||
COMPUTE_STAGE_ID,
|
||||
CLEAR_SYSMEM_STAGE_ID,
|
||||
CLEAR_GMEM_STAGE_ID,
|
||||
GENERIC_CLEAR_STAGE_ID,
|
||||
GMEM_LOAD_STAGE_ID,
|
||||
GMEM_STORE_STAGE_ID,
|
||||
SYSMEM_RESOLVE_STAGE_ID,
|
||||
|
|
@ -81,6 +82,7 @@ static const struct {
|
|||
[COMPUTE_STAGE_ID] = { "Compute", "Compute job" },
|
||||
[CLEAR_SYSMEM_STAGE_ID] = { "Clear Sysmem", "" },
|
||||
[CLEAR_GMEM_STAGE_ID] = { "Clear GMEM", "Per-tile (GMEM) clear" },
|
||||
[GENERIC_CLEAR_STAGE_ID] = { "Clear Sysmem/Gmem", ""},
|
||||
[GMEM_LOAD_STAGE_ID] = { "GMEM Load", "Per tile system memory to GMEM load" },
|
||||
[GMEM_STORE_STAGE_ID] = { "GMEM Store", "Per tile GMEM to system memory store" },
|
||||
[SYSMEM_RESOLVE_STAGE_ID] = { "SysMem Resolve", "System memory MSAA resolve" },
|
||||
|
|
@ -500,6 +502,7 @@ CREATE_EVENT_CALLBACK(draw_ib_sysmem, BYPASS_STAGE_ID)
|
|||
CREATE_EVENT_CALLBACK(blit, BLIT_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(compute, COMPUTE_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(compute_indirect, COMPUTE_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(generic_clear, GENERIC_CLEAR_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(gmem_clear, CLEAR_GMEM_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(sysmem_clear, CLEAR_SYSMEM_STAGE_ID)
|
||||
CREATE_EVENT_CALLBACK(sysmem_clear_all, CLEAR_SYSMEM_STAGE_ID)
|
||||
|
|
|
|||
|
|
@ -101,6 +101,11 @@ begin_end_tp('binning_ib')
|
|||
begin_end_tp('draw_ib_sysmem')
|
||||
begin_end_tp('draw_ib_gmem')
|
||||
|
||||
begin_end_tp('generic_clear',
|
||||
args=[Arg(type='enum VkFormat', var='format', c_format='%s', to_prim_type='vk_format_description({})->short_name'),
|
||||
Arg(type='bool', var='ubwc', c_format='%s', to_prim_type='({} ? "true" : "false")'),
|
||||
Arg(type='uint8_t', var='samples', c_format='%u')])
|
||||
|
||||
begin_end_tp('gmem_clear',
|
||||
args=[Arg(type='enum VkFormat', var='format', c_format='%s', to_prim_type='vk_format_description({})->short_name'),
|
||||
Arg(type='uint8_t', var='samples', c_format='%u')])
|
||||
|
|
|
|||
|
|
@ -1342,7 +1342,7 @@ emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,
|
|||
}
|
||||
|
||||
if (CHIP >= A7XX)
|
||||
OUT_REG(ring, A7XX_RB_UNKNOWN_88E4(.unk0 = 1));
|
||||
OUT_REG(ring, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
|
||||
|
||||
fd6_emit_blit<CHIP>(batch->ctx, ring);
|
||||
}
|
||||
|
|
@ -1444,7 +1444,7 @@ emit_subpass_clears(struct fd_batch *batch, struct fd_batch_subpass *subpass)
|
|||
OUT_RING(ring, uc.ui[3]);
|
||||
|
||||
if (CHIP >= A7XX)
|
||||
OUT_REG(ring, A7XX_RB_UNKNOWN_88E4(.unk0 = 1));
|
||||
OUT_REG(ring, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
|
||||
|
||||
fd6_emit_blit<CHIP>(batch->ctx, ring);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue