From 75e0290e4285f31860acc751f4dd6bf9ac858000 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 31 Jul 2024 13:29:11 -0700 Subject: [PATCH] tu: Use CHIP variant reg builders Avoid using the non-variant builders for regs that differ btwn generations. This will become deprecated. Signed-off-by: Rob Clark Part-of: --- src/freedreno/vulkan/tu_clear_blit.cc | 69 ++++++++++++++++----------- src/freedreno/vulkan/tu_cmd_buffer.cc | 48 +++++++++++-------- src/freedreno/vulkan/tu_device.cc | 2 +- src/freedreno/vulkan/tu_shader.cc | 4 +- 4 files changed, 74 insertions(+), 49 deletions(-) diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 26ae67577e5..056d780e65e 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -1320,6 +1320,7 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd, r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); } +template static void r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, enum pipe_format src_format) @@ -1333,10 +1334,14 @@ r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, mrt_buf_info = (mrt_buf_info & ~A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK) | A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(fmt); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); - tu_cs_emit(cs, mrt_buf_info); - tu_cs_image_ref(cs, iview, layer); - tu_cs_emit(cs, 0); + + tu_cs_emit_regs(cs, + RB_MRT_BUF_INFO(CHIP, 0, .dword = mrt_buf_info), + A6XX_RB_MRT_PITCH(0, iview->pitch), + A6XX_RB_MRT_ARRAY_PITCH(0, iview->layer_size), + A6XX_RB_MRT_BASE(0, .qword = tu_layer_address(iview, layer)), + A6XX_RB_MRT_BASE_GMEM(0), + ); tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); tu_cs_image_flag_ref(cs, iview, layer); @@ -1346,37 +1351,46 @@ r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, */ tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = fmt)); - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled)); + tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP, .flag_mrts = iview->ubwc_enabled)); tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } +template static void r3d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) { - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); - tu_cs_emit(cs, tu_image_view_depth(iview, RB_MRT_BUF_INFO)); - tu_cs_image_depth_ref(cs, iview, layer); - tu_cs_emit(cs, 0); + tu_cs_emit_regs(cs, + RB_MRT_BUF_INFO(CHIP, 0, .dword = tu_image_view_depth(iview, RB_MRT_BUF_INFO)), + A6XX_RB_MRT_PITCH(0, iview->depth_pitch), + A6XX_RB_MRT_ARRAY_PITCH(0, iview->depth_layer_size), + A6XX_RB_MRT_BASE(0, .qword = iview->depth_base_addr + iview->depth_layer_size * layer), + A6XX_RB_MRT_BASE_GMEM(0), + ); tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); tu_cs_image_flag_ref(cs, &iview->view, layer); - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->view.ubwc_enabled)); + tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP, .flag_mrts = iview->view.ubwc_enabled)); tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } +template static void r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) { - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); - tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO)); - tu_cs_image_stencil_ref(cs, iview, layer); - tu_cs_emit(cs, 0); + tu_cs_emit_regs(cs, + RB_MRT_BUF_INFO(CHIP, 0, .dword = tu_image_view_stencil(iview, RB_MRT_BUF_INFO)), + A6XX_RB_MRT_PITCH(0, iview->stencil_pitch), + A6XX_RB_MRT_ARRAY_PITCH(0, iview->stencil_layer_size), + A6XX_RB_MRT_BASE(0, .qword = iview->stencil_base_addr + iview->stencil_layer_size * layer), + A6XX_RB_MRT_BASE_GMEM(0), + ); - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); + tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP)); tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } +template static void r3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch, enum pipe_format src_format) @@ -1387,16 +1401,17 @@ r3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t fixup_dst_format(src_format, &format, &color_fmt); tu_cs_emit_regs(cs, - A6XX_RB_MRT_BUF_INFO(0, .color_format = color_fmt, .color_swap = fmt.swap), + RB_MRT_BUF_INFO(CHIP, 0, .color_format = color_fmt, .color_swap = fmt.swap), A6XX_RB_MRT_PITCH(0, pitch), A6XX_RB_MRT_ARRAY_PITCH(0, 0), A6XX_RB_MRT_BASE(0, .qword = va), A6XX_RB_MRT_BASE_GMEM(0, 0)); - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); + tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP)); tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } +template static void r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct tu_image_view *iview, @@ -1420,7 +1435,7 @@ r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs, } tu_cs_emit_regs(cs, - A6XX_RB_MRT_BUF_INFO(0, .dword = RB_MRT_BUF_INFO), + RB_MRT_BUF_INFO(CHIP, 0, .dword = RB_MRT_BUF_INFO), A6XX_RB_MRT_PITCH(0, 0), A6XX_RB_MRT_ARRAY_PITCH(0, 0), A6XX_RB_MRT_BASE(0, 0), @@ -1431,7 +1446,7 @@ r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs, tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = color_format)); - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); + tu_cs_emit_regs(cs, RB_RENDER_CNTL(CHIP)); tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL()); } @@ -1663,10 +1678,10 @@ static const struct blit_ops r3d_ops = { .clear_value = r3d_clear_value, .src = r3d_src, .src_buffer = r3d_src_buffer, - .dst = r3d_dst, - .dst_depth = r3d_dst_depth, - .dst_stencil = r3d_dst_stencil, - .dst_buffer = r3d_dst_buffer, + .dst = r3d_dst, + .dst_depth = r3d_dst_depth, + .dst_stencil = r3d_dst_stencil, + .dst_buffer = r3d_dst_buffer, .setup = r3d_setup, .run = r3d_run, .teardown = r3d_teardown, @@ -3666,7 +3681,7 @@ load_3d_blit(struct tu_cmd_buffer *cmd, tu_create_fdm_bin_patchpoint(cmd, cs, 4, fdm_apply_load_coords, state); } - r3d_dst_gmem(cmd, cs, iview, att, separate_stencil, i); + r3d_dst_gmem(cmd, cs, iview, att, separate_stencil, i); if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (separate_stencil) @@ -3897,12 +3912,12 @@ store_3d_blit(struct tu_cmd_buffer *cmd, if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (!separate_stencil) { - r3d_dst_depth(cs, iview, layer); + r3d_dst_depth(cs, iview, layer); } else { - r3d_dst_stencil(cs, iview, layer); + r3d_dst_stencil(cs, iview, layer); } } else { - r3d_dst(cs, &iview->view, layer, src_format); + r3d_dst(cs, &iview->view, layer, src_format); } r3d_src_gmem(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 4fd5f174e2a..50c4d3fa03d 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -306,7 +306,7 @@ emit_rb_ccu_cntl(struct tu_cs *cs, struct tu_device *dev, bool gmem) : dev->physical_device->vpc_attr_buf_size_bypass), ); } } else { - tu_cs_emit_regs(cs, A6XX_RB_CCU_CNTL( + tu_cs_emit_regs(cs, RB_CCU_CNTL(CHIP, .gmem_fast_clear_disable = !dev->physical_device->info->a6xx.has_gmem_fast_clear, .concurrent_resolve = @@ -381,7 +381,7 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, const uint32_t a = subpass->depth_stencil_attachment.attachment; if (a == VK_ATTACHMENT_UNUSED) { tu_cs_emit_regs(cs, - A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE), + RB_DEPTH_BUFFER_INFO(CHIP, .depth_format = DEPTH6_NONE), A6XX_RB_DEPTH_BUFFER_PITCH(0), A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(0), A6XX_RB_DEPTH_BUFFER_BASE(0), @@ -390,7 +390,7 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE)); - tu_cs_emit_regs(cs, A6XX_RB_STENCIL_INFO(0)); + tu_cs_emit_regs(cs, RB_STENCIL_INFO(CHIP, 0)); return; } @@ -435,10 +435,11 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, } } else { tu_cs_emit_regs(cs, - A6XX_RB_STENCIL_INFO(0)); + RB_STENCIL_INFO(CHIP, 0)); } } +template static void tu6_emit_mrt(struct tu_cmd_buffer *cmd, const struct tu_subpass *subpass, @@ -463,9 +464,13 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, * to also be required for alpha-to-coverage which can use the alpha * value for an otherwise-unused attachment. */ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6); - for (unsigned i = 0; i < 6; i++) - tu_cs_emit(cs, 0); + tu_cs_emit_regs(cs, + RB_MRT_BUF_INFO(CHIP, i), + A6XX_RB_MRT_PITCH(i), + A6XX_RB_MRT_ARRAY_PITCH(i), + A6XX_RB_MRT_BASE(i), + A6XX_RB_MRT_BASE_GMEM(i), + ); tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(i, .dword = 0)); @@ -474,10 +479,15 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, const struct tu_image_view *iview = cmd->state.attachments[a]; - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6); - tu_cs_emit(cs, iview->view.RB_MRT_BUF_INFO); - tu_cs_image_ref(cs, &iview->view, 0); - tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a], 0)); + tu_cs_emit_regs(cs, + RB_MRT_BUF_INFO(CHIP, i, .dword = iview->view.RB_MRT_BUF_INFO), + A6XX_RB_MRT_PITCH(i, iview->view.pitch), + A6XX_RB_MRT_ARRAY_PITCH(i, iview->view.layer_size), + A6XX_RB_MRT_BASE(i, .qword = tu_layer_address(&iview->view, 0)), + A6XX_RB_MRT_BASE_GMEM(i, + tu_attachment_gmem_offset(cmd, &cmd->state.pass->attachments[a], 0) + ), + ); tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(i, .dword = iview->view.SP_FS_MRT_REG)); @@ -621,7 +631,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, bool binning) { tu_cs_emit_regs( - cs, A7XX_RB_RENDER_CNTL(.binning = binning, .raster_mode = TYPE_TILED, + cs, RB_RENDER_CNTL(A7XX, .binning = binning, .raster_mode = TYPE_TILED, .raster_direction = LR_TB)); tu_cs_emit_regs(cs, A7XX_GRAS_SU_RENDER_CNTL(.binning = binning)); } @@ -1271,7 +1281,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) * change per-RP and don't require a WFI to take effect, only CCU inval/flush * events are required. */ - tu_cs_emit_regs(cs, A7XX_RB_CCU_CNTL( + tu_cs_emit_regs(cs, RB_CCU_CNTL(CHIP, .gmem_fast_clear_disable = !dev->physical_device->info->a6xx.has_gmem_fast_clear, .concurrent_resolve = dev->physical_device->info->a6xx.concurrent_resolve, @@ -1398,11 +1408,11 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) .bo_offset = gb_offset(bcolor_builtin))); if (CHIP == A7XX) { - tu_cs_emit_regs(cs, A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_0(0), - A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_1(0x3fe05ff4), - A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_2(0x3fa0ebee), - A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_3(0x3f5193ed), - A6XX_TPL1_BICUBIC_WEIGHTS_TABLE_4(0x3f0243f0), ); + tu_cs_emit_regs(cs, TPL1_BICUBIC_WEIGHTS_TABLE_0(CHIP, 0), + TPL1_BICUBIC_WEIGHTS_TABLE_1(CHIP, 0x3fe05ff4), + TPL1_BICUBIC_WEIGHTS_TABLE_2(CHIP, 0x3fa0ebee), + TPL1_BICUBIC_WEIGHTS_TABLE_3(CHIP, 0x3f5193ed), + TPL1_BICUBIC_WEIGHTS_TABLE_4(CHIP, 0x3f0243f0), ); } if (phys_dev->info->a7xx.cmdbuf_start_a725_quirk) { @@ -4400,7 +4410,7 @@ tu_emit_subpass_begin(struct tu_cmd_buffer *cmd) tu_emit_subpass_begin_sysmem(cmd); tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs); - tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs); + tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs); tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false); tu_set_input_attachments(cmd, cmd->state.subpass); diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 2badbb6ff55..82502c0ed08 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -2153,7 +2153,7 @@ tu_init_cmdbuf_start_a725_quirk(struct tu_device *device) tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1)); tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0)); tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0)); - tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_CNTL_1( + tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL_1(A7XX, .linearlocalidregid = regid(63, 0), .threadsize = THREAD128, .unk11 = true, diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 00354e9930a..69bc6f13467 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -1449,7 +1449,7 @@ tu6_emit_cs_config(struct tu_cs *cs, : (v->local_size[1] % 2 == 0) ? CS_YALIGN_2 : CS_YALIGN_1; tu_cs_emit_regs( - cs, A7XX_HLSQ_CS_CNTL_1( + cs, HLSQ_CS_CNTL_1(CHIP, .linearlocalidregid = regid(63, 0), .threadsize = thrsz_cs, /* A7XX TODO: blob either sets all of these unknowns * together or doesn't set them at all. @@ -1465,7 +1465,7 @@ tu6_emit_cs_config(struct tu_cs *cs, A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); tu_cs_emit_regs(cs, - A7XX_SP_CS_CNTL_1( + SP_CS_CNTL_1(CHIP, .linearlocalidregid = regid(63, 0), .threadsize = thrsz_cs, /* A7XX TODO: enable UNK15 when we don't use subgroup ops. */