tu: Use CHIP variant reg builders

Avoid using the non-variant builders for regs that differ btwn
generations.  This will become deprecated.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30452>
This commit is contained in:
Rob Clark 2024-07-31 13:29:11 -07:00 committed by Marge Bot
parent 47468554d9
commit 75e0290e42
4 changed files with 74 additions and 49 deletions

View file

@ -1320,6 +1320,7 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
}
template <chip CHIP>
static void
r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
enum pipe_format src_format)
@ -1333,10 +1334,14 @@ r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
mrt_buf_info =
(mrt_buf_info & ~A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK) |
A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(fmt);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
tu_cs_emit(cs, mrt_buf_info);
tu_cs_image_ref(cs, iview, layer);
tu_cs_emit(cs, 0);
tu_cs_emit_regs(cs,
RB_MRT_BUF_INFO(CHIP, 0, .dword = mrt_buf_info),
A6XX_RB_MRT_PITCH(0, iview->pitch),
A6XX_RB_MRT_ARRAY_PITCH(0, iview->layer_size),
A6XX_RB_MRT_BASE(0, .qword = tu_layer_address(iview, layer)),
A6XX_RB_MRT_BASE_GMEM(0),
);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
tu_cs_image_flag_ref(cs, iview, layer);
@ -1346,37 +1351,46 @@ r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
*/
tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = fmt));
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled));
tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP, .flag_mrts = iview->ubwc_enabled));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL());
}
template <chip CHIP>
static void
r3d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
tu_cs_emit(cs, tu_image_view_depth(iview, RB_MRT_BUF_INFO));
tu_cs_image_depth_ref(cs, iview, layer);
tu_cs_emit(cs, 0);
tu_cs_emit_regs(cs,
RB_MRT_BUF_INFO(CHIP, 0, .dword = tu_image_view_depth(iview, RB_MRT_BUF_INFO)),
A6XX_RB_MRT_PITCH(0, iview->depth_pitch),
A6XX_RB_MRT_ARRAY_PITCH(0, iview->depth_layer_size),
A6XX_RB_MRT_BASE(0, .qword = iview->depth_base_addr + iview->depth_layer_size * layer),
A6XX_RB_MRT_BASE_GMEM(0),
);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
tu_cs_image_flag_ref(cs, &iview->view, layer);
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->view.ubwc_enabled));
tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP, .flag_mrts = iview->view.ubwc_enabled));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL());
}
template <chip CHIP>
static void
r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
{
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO));
tu_cs_image_stencil_ref(cs, iview, layer);
tu_cs_emit(cs, 0);
tu_cs_emit_regs(cs,
RB_MRT_BUF_INFO(CHIP, 0, .dword = tu_image_view_stencil(iview, RB_MRT_BUF_INFO)),
A6XX_RB_MRT_PITCH(0, iview->stencil_pitch),
A6XX_RB_MRT_ARRAY_PITCH(0, iview->stencil_layer_size),
A6XX_RB_MRT_BASE(0, .qword = iview->stencil_base_addr + iview->stencil_layer_size * layer),
A6XX_RB_MRT_BASE_GMEM(0),
);
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL());
}
template <chip CHIP>
static void
r3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch,
enum pipe_format src_format)
@ -1387,16 +1401,17 @@ r3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t
fixup_dst_format(src_format, &format, &color_fmt);
tu_cs_emit_regs(cs,
A6XX_RB_MRT_BUF_INFO(0, .color_format = color_fmt, .color_swap = fmt.swap),
RB_MRT_BUF_INFO(CHIP, 0, .color_format = color_fmt, .color_swap = fmt.swap),
A6XX_RB_MRT_PITCH(0, pitch),
A6XX_RB_MRT_ARRAY_PITCH(0, 0),
A6XX_RB_MRT_BASE(0, .qword = va),
A6XX_RB_MRT_BASE_GMEM(0, 0));
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL());
}
template <chip CHIP>
static void
r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
const struct tu_image_view *iview,
@ -1420,7 +1435,7 @@ r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
}
tu_cs_emit_regs(cs,
A6XX_RB_MRT_BUF_INFO(0, .dword = RB_MRT_BUF_INFO),
RB_MRT_BUF_INFO(CHIP, 0, .dword = RB_MRT_BUF_INFO),
A6XX_RB_MRT_PITCH(0, 0),
A6XX_RB_MRT_ARRAY_PITCH(0, 0),
A6XX_RB_MRT_BASE(0, 0),
@ -1431,7 +1446,7 @@ r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
tu_cs_emit_regs(cs,
A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = color_format));
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL());
}
@ -1663,10 +1678,10 @@ static const struct blit_ops r3d_ops = {
.clear_value = r3d_clear_value,
.src = r3d_src,
.src_buffer = r3d_src_buffer<CHIP>,
.dst = r3d_dst,
.dst_depth = r3d_dst_depth,
.dst_stencil = r3d_dst_stencil,
.dst_buffer = r3d_dst_buffer,
.dst = r3d_dst<CHIP>,
.dst_depth = r3d_dst_depth<CHIP>,
.dst_stencil = r3d_dst_stencil<CHIP>,
.dst_buffer = r3d_dst_buffer<CHIP>,
.setup = r3d_setup<CHIP>,
.run = r3d_run,
.teardown = r3d_teardown<CHIP>,
@ -3666,7 +3681,7 @@ load_3d_blit(struct tu_cmd_buffer *cmd,
tu_create_fdm_bin_patchpoint(cmd, cs, 4, fdm_apply_load_coords, state);
}
r3d_dst_gmem(cmd, cs, iview, att, separate_stencil, i);
r3d_dst_gmem<CHIP>(cmd, cs, iview, att, separate_stencil, i);
if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
if (separate_stencil)
@ -3897,12 +3912,12 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
if (!separate_stencil) {
r3d_dst_depth(cs, iview, layer);
r3d_dst_depth<CHIP>(cs, iview, layer);
} else {
r3d_dst_stencil(cs, iview, layer);
r3d_dst_stencil<CHIP>(cs, iview, layer);
}
} else {
r3d_dst(cs, &iview->view, layer, src_format);
r3d_dst<CHIP>(cs, &iview->view, layer, src_format);
}
r3d_src_gmem<CHIP>(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp);

View file

@ -306,7 +306,7 @@ emit_rb_ccu_cntl(struct tu_cs *cs, struct tu_device *dev, bool gmem)
: dev->physical_device->vpc_attr_buf_size_bypass), );
}
} else {
tu_cs_emit_regs(cs, A6XX_RB_CCU_CNTL(
tu_cs_emit_regs(cs, RB_CCU_CNTL(CHIP,
.gmem_fast_clear_disable =
!dev->physical_device->info->a6xx.has_gmem_fast_clear,
.concurrent_resolve =
@ -381,7 +381,7 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
const uint32_t a = subpass->depth_stencil_attachment.attachment;
if (a == VK_ATTACHMENT_UNUSED) {
tu_cs_emit_regs(cs,
A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE),
RB_DEPTH_BUFFER_INFO(CHIP, .depth_format = DEPTH6_NONE),
A6XX_RB_DEPTH_BUFFER_PITCH(0),
A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(0),
A6XX_RB_DEPTH_BUFFER_BASE(0),
@ -390,7 +390,7 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
tu_cs_emit_regs(cs,
A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
tu_cs_emit_regs(cs, A6XX_RB_STENCIL_INFO(0));
tu_cs_emit_regs(cs, RB_STENCIL_INFO(CHIP, 0));
return;
}
@ -435,10 +435,11 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
}
} else {
tu_cs_emit_regs(cs,
A6XX_RB_STENCIL_INFO(0));
RB_STENCIL_INFO(CHIP, 0));
}
}
template <chip CHIP>
static void
tu6_emit_mrt(struct tu_cmd_buffer *cmd,
const struct tu_subpass *subpass,
@ -463,9 +464,13 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
* to also be required for alpha-to-coverage which can use the alpha
* value for an otherwise-unused attachment.
*/
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
for (unsigned i = 0; i < 6; i++)
tu_cs_emit(cs, 0);
tu_cs_emit_regs(cs,
RB_MRT_BUF_INFO(CHIP, i),
A6XX_RB_MRT_PITCH(i),
A6XX_RB_MRT_ARRAY_PITCH(i),
A6XX_RB_MRT_BASE(i),
A6XX_RB_MRT_BASE_GMEM(i),
);
tu_cs_emit_regs(cs,
A6XX_SP_FS_MRT_REG(i, .dword = 0));
@ -474,10 +479,15 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
const struct tu_image_view *iview = cmd->state.attachments[a];
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6);
tu_cs_emit(cs, iview->view.RB_MRT_BUF_INFO);
tu_cs_image_ref(cs, &iview->view, 0);
tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a], 0));
tu_cs_emit_regs(cs,
RB_MRT_BUF_INFO(CHIP, i, .dword = iview->view.RB_MRT_BUF_INFO),
A6XX_RB_MRT_PITCH(i, iview->view.pitch),
A6XX_RB_MRT_ARRAY_PITCH(i, iview->view.layer_size),
A6XX_RB_MRT_BASE(i, .qword = tu_layer_address(&iview->view, 0)),
A6XX_RB_MRT_BASE_GMEM(i,
tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a], 0)
),
);
tu_cs_emit_regs(cs,
A6XX_SP_FS_MRT_REG(i, .dword = iview->view.SP_FS_MRT_REG));
@ -621,7 +631,7 @@ tu6_emit_render_cntl<A7XX>(struct tu_cmd_buffer *cmd,
bool binning)
{
tu_cs_emit_regs(
cs, A7XX_RB_RENDER_CNTL(.binning = binning, .raster_mode = TYPE_TILED,
cs, RB_RENDER_CNTL(A7XX, .binning = binning, .raster_mode = TYPE_TILED,
.raster_direction = LR_TB));
tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL(.binning = binning));
}
@ -1271,7 +1281,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
* change per-RP and don't require a WFI to take effect, only CCU inval/flush
* events are required.
*/
tu_cs_emit_regs(cs, A7XX_RB_CCU_CNTL(
tu_cs_emit_regs(cs, RB_CCU_CNTL(CHIP,
.gmem_fast_clear_disable =
!dev->physical_device->info->a6xx.has_gmem_fast_clear,
.concurrent_resolve = dev->physical_device->info->a6xx.concurrent_resolve,
@ -1398,11 +1408,11 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
.bo_offset = gb_offset(bcolor_builtin)));
if (CHIP == A7XX) {
tu_cs_emit_regs(cs, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0(0),
A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1(0x3fe05ff4),
A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2(0x3fa0ebee),
A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3(0x3f5193ed),
A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4(0x3f0243f0), );
tu_cs_emit_regs(cs, TPL1_BICUBIC_WEIGHTS_TABLE_0(CHIP, 0),
TPL1_BICUBIC_WEIGHTS_TABLE_1(CHIP, 0x3fe05ff4),
TPL1_BICUBIC_WEIGHTS_TABLE_2(CHIP, 0x3fa0ebee),
TPL1_BICUBIC_WEIGHTS_TABLE_3(CHIP, 0x3f5193ed),
TPL1_BICUBIC_WEIGHTS_TABLE_4(CHIP, 0x3f0243f0), );
}
if (phys_dev->info->a7xx.cmdbuf_start_a725_quirk) {
@ -4400,7 +4410,7 @@ tu_emit_subpass_begin(struct tu_cmd_buffer *cmd)
tu_emit_subpass_begin_sysmem<CHIP>(cmd);
tu6_emit_zs<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs);
tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs);
tu6_emit_mrt<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs);
tu6_emit_render_cntl<CHIP>(cmd, cmd->state.subpass, &cmd->draw_cs, false);
tu_set_input_attachments(cmd, cmd->state.subpass);

View file

@ -2153,7 +2153,7 @@ tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_CNTL_1(
tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL_1(A7XX,
.linearlocalidregid = regid(63, 0),
.threadsize = THREAD128,
.unk11 = true,

View file

@ -1449,7 +1449,7 @@ tu6_emit_cs_config(struct tu_cs *cs,
: (v->local_size[1] % 2 == 0) ? CS_YALIGN_2
: CS_YALIGN_1;
tu_cs_emit_regs(
cs, A7XX_HLSQ_CS_CNTL_1(
cs, HLSQ_CS_CNTL_1(CHIP,
.linearlocalidregid = regid(63, 0), .threadsize = thrsz_cs,
/* A7XX TODO: blob either sets all of these unknowns
* together or doesn't set them at all.
@ -1465,7 +1465,7 @@ tu6_emit_cs_config(struct tu_cs *cs,
A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
tu_cs_emit_regs(cs,
A7XX_SP_CS_CNTL_1(
SP_CS_CNTL_1(CHIP,
.linearlocalidregid = regid(63, 0),
.threadsize = thrsz_cs,
/* A7XX TODO: enable UNK15 when we don't use subgroup ops. */