diff --git a/src/freedreno/common/freedreno_stompable_regs.h b/src/freedreno/common/freedreno_stompable_regs.h index 32d34956cd0..7923e9f5f96 100644 --- a/src/freedreno/common/freedreno_stompable_regs.h +++ b/src/freedreno/common/freedreno_stompable_regs.h @@ -8,6 +8,8 @@ #include +#include "util/macros.h" + #include "fd6_hw.h" /* In order to debug issues with usage of stale reg data we need to have @@ -26,6 +28,9 @@ static inline bool fd_reg_stomp_allowed(chip CHIP, uint16_t reg) { + PRAGMA_DIAGNOSTIC_PUSH + PRAGMA_DIAGNOSTIC_IGNORED_CLANG(-W#pragma-messages) + switch (CHIP) { case A6XX: { switch (reg) { @@ -77,6 +82,7 @@ fd_reg_stomp_allowed(chip CHIP, uint16_t reg) default: { UNREACHABLE("Unknown GPU"); } + PRAGMA_DIAGNOSTIC_POP } return true; diff --git a/src/freedreno/registers/adreno/adreno_pm4.xml b/src/freedreno/registers/adreno/adreno_pm4.xml index 0f4556fb986..15b764f113c 100644 --- a/src/freedreno/registers/adreno/adreno_pm4.xml +++ b/src/freedreno/registers/adreno/adreno_pm4.xml @@ -1095,7 +1095,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) - + diff --git a/src/freedreno/registers/gen_header.py b/src/freedreno/registers/gen_header.py index d6aebdba6e6..f1ed5ef4c7e 100644 --- a/src/freedreno/registers/gen_header.py +++ b/src/freedreno/registers/gen_header.py @@ -121,6 +121,10 @@ def tab_to(name, value): tab_count = 1 print(name + ('\t' * tab_count) + value) +def define_macro(name, value, has_variants): + if has_variants: + value = "__FD_DEPRECATED " + value + tab_to(name, value) def mask(low, high): return ((0xffffffffffffffff >> (64 - (high + 1 - low))) << low) @@ -258,11 +262,11 @@ class Bitset(object): # Requires using `fui()` or `_mesa_float_to_half()` constexpr_mark = "" if reg.bit_size == 64: - tab_to(" uint64_t", "unknown;") tab_to(" uint64_t", "qword;") + tab_to(" uint64_t", "unknown;") else: - tab_to(" uint32_t", "unknown;") tab_to(" uint32_t", "dword;") + tab_to(" uint32_t", "unknown;") print("};\n") if not has_variants: @@ -407,11 +411,13 @@ class Array(object): print("\t\tdefault: return INVALID_IDX(idx);") print("\t}\n}") if proto == '': - tab_to("#define REG_%s_%s" % - (self.domain, self.name), "0x%08x\n" % array_offset) + define_macro("#define REG_%s_%s" % + (self.domain, self.name), "0x%08x\n" % array_offset, + has_variants) else: - tab_to("#define REG_%s_%s(%s)" % (self.domain, self.name, - proto), "(0x%08x + %s )\n" % (array_offset, strides)) + define_macro("#define REG_%s_%s(%s)" % (self.domain, self.name, + proto), "(0x%08x + %s )\n" % (array_offset, strides), + has_variants) def dump_pack_struct(self, has_variants): pass @@ -466,10 +472,13 @@ class Reg(object): strides = indices_strides(self.indices()) offset = self.total_offset() if proto == '': - tab_to("#define REG_%s" % self.full_name, "0x%08x" % offset) + define_macro("#define REG_%s" % self.full_name, "0x%08x" % offset, has_variants) elif not has_variants: - print("static CONSTEXPR inline uint32_t REG_%s(%s) { return 0x%08x + %s; }" % ( - self.full_name, proto, offset, strides)) + depcrstr = "" + if has_variants: + depcrstr = " __FD_DEPRECATED " + print("static CONSTEXPR inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % ( + depcrstr, self.full_name, proto, offset, strides)) if self.bitset.inline: self.bitset.dump(has_variants, self.full_name, self) @@ -980,6 +989,15 @@ def dump_c(args, guard, func): print("#endif") print() + # TODO figure out what to do about fd_reg_stomp_allowed() + # vs gcc.. for now only enable the warnings with clang: + print("#if defined(__clang__) && !defined(FD_NO_DEPRECATED_PACK)") + print("#define __FD_DEPRECATED _Pragma (\"GCC warning \\\"Deprecated reg builder\\\"\")") + print("#else") + print("#define __FD_DEPRECATED") + print("#endif") + print() + func(p) print("#endif /* %s */" % guard) diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 4b02ac24982..5ba807930ef 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -484,21 +484,25 @@ r2d_setup_common(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt4(cs, REG_A6XX_RB_A2D_PIXEL_CNTL, 1); tu_cs_emit(cs, unknown_8c01); // TODO: seem to be always 0 on A7XX - uint32_t blit_cntl = A6XX_RB_A2D_BLT_CNTL( - .rotate = (enum a6xx_rotation) blit_param, - .solid_color = clear, - .color_format = fmt, - .scissor = scissor, - .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, - .mask = 0xf, - .ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt, - ).value; + tu_cs_emit_regs(cs, A6XX_RB_A2D_BLT_CNTL( + .rotate = (enum a6xx_rotation) blit_param, + .solid_color = clear, + .color_format = fmt, + .scissor = scissor, + .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, + .mask = 0xf, + .ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt, + )); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_A2D_BLT_CNTL, 1); - tu_cs_emit(cs, blit_cntl); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_A2D_BLT_CNTL, 1); - tu_cs_emit(cs, blit_cntl); + tu_cs_emit_regs(cs, GRAS_A2D_BLT_CNTL(CHIP, + .rotate = (enum a6xx_rotation) blit_param, + .solid_color = clear, + .color_format = fmt, + .scissor = scissor, + .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, + .mask = 0xf, + .ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt, + )); if (CHIP > A6XX) { tu_cs_emit_regs(cs, TPL1_A2D_BLT_CNTL(CHIP, .raw_copy = false, @@ -871,16 +875,12 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, enum r3d_type type, .cs_bindless = CHIP == A6XX ? 0x1f : 0xff, .gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,)); - tu_crb crb = cs->crb(2 * 5 + 2 * 11); - tu6_emit_xs_config(crb, MESA_SHADER_VERTEX, vs); - tu6_emit_xs_config(crb, MESA_SHADER_TESS_CTRL, NULL); - tu6_emit_xs_config(crb, MESA_SHADER_TESS_EVAL, NULL); - tu6_emit_xs_config(crb, MESA_SHADER_GEOMETRY, NULL); - tu6_emit_xs_config(crb, MESA_SHADER_FRAGMENT, fs); - struct tu_pvtmem_config pvtmem = {}; - tu6_emit_xs(crb, cs->device, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova); - tu6_emit_xs(crb, cs->device, MESA_SHADER_FRAGMENT, fs, &pvtmem, fs_iova); - crb.flush(); + with_crb (cs, 2 * 5 + 2 * 11) { + tu6_emit_xs_config(crb, { .vs = vs, .fs = fs }); + struct tu_pvtmem_config pvtmem = {}; + tu6_emit_xs(crb, cs->device, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova); + tu6_emit_xs(crb, cs->device, MESA_SHADER_FRAGMENT, fs, &pvtmem, fs_iova); + } tu6_emit_xs_constants(cs, MESA_SHADER_VERTEX, vs, vs_iova); tu6_emit_xs_constants(cs, MESA_SHADER_FRAGMENT, fs, fs_iova); @@ -5311,12 +5311,12 @@ store_3d_blit(struct tu_cmd_buffer *cmd, * save/restore them dynamically. */ tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1); - tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A6XX_RB_CNTL) | + tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(RB_CNTL(CHIP).reg) | CP_REG_TO_SCRATCH_0_SCRATCH(0) | CP_REG_TO_SCRATCH_0_CNT(1 - 1)); if (CHIP >= A7XX) { tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1); - tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A7XX_RB_BUFFER_CNTL) | + tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(RB_BUFFER_CNTL(CHIP).reg) | CP_REG_TO_SCRATCH_0_SCRATCH(1) | CP_REG_TO_SCRATCH_0_CNT(1 - 1)); } @@ -5357,18 +5357,18 @@ store_3d_blit(struct tu_cmd_buffer *cmd, /* Restore RB_CNTL/GRAS_SC_BIN_CNTL saved above. */ tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); - tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_RB_CNTL) | + tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(RB_CNTL(CHIP).reg) | CP_SCRATCH_TO_REG_0_SCRATCH(0) | CP_SCRATCH_TO_REG_0_CNT(1 - 1)); tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); - tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_GRAS_SC_BIN_CNTL) | + tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(GRAS_SC_BIN_CNTL(CHIP).reg) | CP_SCRATCH_TO_REG_0_SCRATCH(0) | CP_SCRATCH_TO_REG_0_CNT(1 - 1)); if (CHIP >= A7XX) { tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); - tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A7XX_RB_BUFFER_CNTL) | + tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(RB_BUFFER_CNTL(CHIP).reg) | CP_SCRATCH_TO_REG_0_SCRATCH(1) | CP_SCRATCH_TO_REG_0_CNT(1 - 1)); } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 27cb6023c5c..e734241aeaa 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -671,17 +671,32 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, &cmd->state.pass->attachments[a]; enum a6xx_depth_format fmt = tu6_pipe2depth(attachment->format); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); - tu_cs_emit(cs, RB_DEPTH_BUFFER_INFO(CHIP, - .depth_format = fmt, - .tilemode = TILE6_3, - .losslesscompen = iview->view.ubwc_enabled, - ).value); - if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) - tu_cs_image_depth_ref(cs, iview, 0); - else - tu_cs_image_ref(cs, &iview->view, 0); - tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0)); + unsigned depth_pitch, depth_array_pitch; + uint64_t depth_base; + + if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + depth_pitch = iview->depth_pitch; + depth_array_pitch = iview->depth_layer_size; + depth_base = iview->depth_base_addr; + } else { + depth_pitch = iview->view.pitch; + depth_array_pitch = iview->view.layer_size; + depth_base = tu_layer_address(&iview->view, 0); + } + + tu_cs_emit_regs(cs, + RB_DEPTH_BUFFER_INFO(CHIP, + .depth_format = fmt, + .tilemode = TILE6_3, + .losslesscompen = iview->view.ubwc_enabled, + ), + A6XX_RB_DEPTH_BUFFER_PITCH(depth_pitch), + A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(depth_array_pitch), + A6XX_RB_DEPTH_BUFFER_BASE(depth_base), + A6XX_RB_DEPTH_GMEM_BASE( + tu_attachment_gmem_offset(cmd, attachment, 0) + ), + ); tu_cs_emit_regs(cs, GRAS_SU_DEPTH_BUFFER_INFO(CHIP, .depth_format = fmt)); @@ -691,18 +706,31 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT || attachment->format == VK_FORMAT_S8_UINT) { - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_BUFFER_INFO, 6); - tu_cs_emit(cs, RB_STENCIL_BUFFER_INFO(CHIP, - .separate_stencil = true, - .tilemode = TILE6_3, - ).value); + unsigned stencil_pitch, stencil_array_pitch, stencil_gmem_offset; + uint64_t stencil_base; + if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - tu_cs_image_stencil_ref(cs, iview, 0); - tu_cs_emit(cs, tu_attachment_gmem_offset_stencil(cmd, attachment, 0)); + stencil_pitch = iview->stencil_pitch; + stencil_array_pitch = iview->stencil_layer_size; + stencil_base = iview->stencil_base_addr; + stencil_gmem_offset = tu_attachment_gmem_offset_stencil(cmd, attachment, 0); } else { - tu_cs_image_ref(cs, &iview->view, 0); - tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0)); + stencil_pitch = iview->view.pitch; + stencil_array_pitch = iview->view.layer_size; + stencil_base = tu_layer_address(&iview->view, 0); + stencil_gmem_offset = tu_attachment_gmem_offset(cmd, attachment, 0); } + + tu_cs_emit_regs(cs, + RB_STENCIL_BUFFER_INFO(CHIP, + .separate_stencil = true, + .tilemode = TILE6_3, + ), + A6XX_RB_STENCIL_BUFFER_PITCH(stencil_pitch), + A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(stencil_array_pitch), + A6XX_RB_STENCIL_BUFFER_BASE(stencil_base), + A6XX_RB_STENCIL_GMEM_BASE(stencil_gmem_offset), + ); } else { tu_cs_emit_regs(cs, RB_STENCIL_BUFFER_INFO(CHIP, 0)); @@ -898,7 +926,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, } if (no_track) { - tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CNTL, 1); + tu_cs_emit_pkt4(cs, RB_RENDER_CNTL(A6XX).reg, 1); tu_cs_emit(cs, cntl); return; } @@ -917,7 +945,7 @@ tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL)); - tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL); + tu_cs_emit(cs, RB_RENDER_CNTL(A6XX).reg); tu_cs_emit(cs, cntl); } @@ -2033,13 +2061,13 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_SP_NC_MODE_CNTL_2, 0); tu_cs_emit_write_reg(cs, REG_A6XX_SP_PERFCTR_SHADER_MASK, 0x3f); if (CHIP == A6XX && !cs->device->physical_device->info->props.is_a702) - tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_UNKNOWN_B605, 0x44); + tu_cs_emit_regs(cs, TPL1_UNKNOWN_B605(CHIP, .dword = 0x44)); if (CHIP == A6XX) { - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80); - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0); + tu_cs_emit_regs(cs, HLSQ_UNKNOWN_BE00(CHIP, .dword = 0x80)); + tu_cs_emit_regs(cs, HLSQ_UNKNOWN_BE01(CHIP)); } - tu_cs_emit_write_reg(cs, REG_A6XX_SP_GFX_USIZE, 0); // 2 on a740 ??? + tu_cs_emit_regs(cs, SP_GFX_USIZE(CHIP)); tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_PS_ROTATION_CNTL, 0); if (CHIP == A6XX) tu_cs_emit_regs(cs, HLSQ_SHARED_CONSTS(CHIP, .enable = false)); @@ -2062,9 +2090,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0); + + tu_cs_emit_regs(cs, RB_UNKNOWN_88F0(CHIP)); } - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0); tu_cs_emit_regs(cs, VPC_REPLACE_MODE_CNTL(CHIP, false)); tu_cs_emit_regs(cs, VPC_ROTATION_CNTL(CHIP)); @@ -2078,10 +2107,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs) tu_cs_emit_regs(cs, GRAS_SU_CONSERVATIVE_RAS_CNTL(CHIP, 0)); tu_cs_emit_regs(cs, PC_DGEN_SU_CONSERVATIVE_RAS_CNTL(CHIP)); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0); + tu_cs_emit_regs(cs, VPC_UNKNOWN_9210(CHIP)); + tu_cs_emit_regs(cs, VPC_UNKNOWN_9211(CHIP)); } - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_LB_MODE_CNTL, 0); + tu_cs_emit_regs(cs, VPC_LB_MODE_CNTL(CHIP)); tu_cs_emit_regs(cs, PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE(CHIP)); tu_cs_emit_regs(cs, A6XX_TPL1_MODE_CNTL(.isammode = ISAMMODE_GL, .texcoordroundmode = dev->instance->use_tex_coord_round_nearest_even_mode @@ -2152,9 +2181,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs) * zero-instance draw calls. See IR3_CONST_ALLOC_DRIVER_PARAMS allocation * for more info. */ - tu_cs_emit_pkt4( - cs, CHIP == A6XX ? REG_A6XX_SP_VS_CONST_CONFIG : REG_A7XX_SP_VS_CONST_CONFIG, 1); - tu_cs_emit(cs, A6XX_SP_VS_CONST_CONFIG_CONSTLEN(8) | A6XX_SP_VS_CONST_CONFIG_ENABLED); + tu_cs_emit_regs(cs, SP_VS_CONST_CONFIG(CHIP, + .constlen = 8, + .enabled = true, + )); } /* Emit the bin restore preamble, which runs in between bins when L1 @@ -9096,7 +9126,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd, * previous dispatches to finish. */ tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3); - tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_SP_CS_NDRANGE_1)); + tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(SP_CS_NDRANGE_1(CHIP).reg)); tu_cs_emit_qw(cs, info->indirect); tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2); @@ -9121,7 +9151,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd, CP_REG_RMW_0_SKIP_WAIT_FOR_ME | CP_REG_RMW_0_SRC0_IS_REG | CP_REG_RMW_0_SRC1_ADD); - tu_cs_emit(cs, REG_A7XX_SP_CS_NDRANGE_1); /* SRC0 */ + tu_cs_emit(cs, SP_CS_NDRANGE_1(CHIP).reg); /* SRC0 */ tu_cs_emit(cs, -1); /* SRC1 */ /* scratch0 = ((scratch0 & (local_size - 1)) rot 2 @@ -9139,7 +9169,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd, /* write scratch0 to SP_CS_NDRANGE_7 */ tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); tu_cs_emit(cs, - CP_SCRATCH_TO_REG_0_REG(REG_A7XX_SP_CS_NDRANGE_7) | + CP_SCRATCH_TO_REG_0_REG(SP_CS_NDRANGE_7(CHIP).reg) | CP_SCRATCH_TO_REG_0_SCRATCH(0)); tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2); @@ -9157,7 +9187,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd, CP_REG_RMW_0_SKIP_WAIT_FOR_ME | CP_REG_RMW_0_SRC0_IS_REG | CP_REG_RMW_0_SRC1_ADD); - tu_cs_emit(cs, REG_A7XX_SP_CS_NDRANGE_1); /* SRC0 */ + tu_cs_emit(cs, SP_CS_NDRANGE_1(CHIP).reg); /* SRC0 */ tu_cs_emit(cs, local_size[0] - 1); /* SRC1 */ unsigned local_size_log2 = util_logbase2(local_size[0]); @@ -9179,7 +9209,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd, /* write scratch0 to SP_CS_KERNEL_GROUP_X */ tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); tu_cs_emit(cs, - CP_SCRATCH_TO_REG_0_REG(REG_A7XX_SP_CS_KERNEL_GROUP_X) | + CP_SCRATCH_TO_REG_0_REG(SP_CS_KERNEL_GROUP_X(CHIP).reg) | CP_SCRATCH_TO_REG_0_SCRATCH(0)); } else { tu_cs_emit_regs(cs, diff --git a/src/freedreno/vulkan/tu_image.cc b/src/freedreno/vulkan/tu_image.cc index ec18832bdce..d7dcf81367e 100644 --- a/src/freedreno/vulkan/tu_image.cc +++ b/src/freedreno/vulkan/tu_image.cc @@ -147,30 +147,6 @@ tu_layer_flag_address(const struct fdl6_view *iview, uint32_t layer) return iview->ubwc_addr + iview->ubwc_layer_size * layer; } -void -tu_cs_image_ref(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer) -{ - tu_cs_emit(cs, A6XX_RB_MRT_PITCH(0, iview->pitch).value); - tu_cs_emit(cs, iview->layer_size >> 6); - tu_cs_emit_qw(cs, tu_layer_address(iview, layer)); -} - -void -tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) -{ - tu_cs_emit(cs, A6XX_RB_STENCIL_BUFFER_PITCH(iview->stencil_pitch).value); - tu_cs_emit(cs, iview->stencil_layer_size >> 6); - tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer); -} - -void -tu_cs_image_depth_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) -{ - tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(iview->depth_pitch).value); - tu_cs_emit(cs, iview->depth_layer_size >> 6); - tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer); -} - template void tu_cs_image_ref_2d(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, bool src) diff --git a/src/freedreno/vulkan/tu_lrz.cc b/src/freedreno/vulkan/tu_lrz.cc index 0402378a25e..e36f7a23cd3 100644 --- a/src/freedreno/vulkan/tu_lrz.cc +++ b/src/freedreno/vulkan/tu_lrz.cc @@ -732,14 +732,14 @@ tu_lrz_before_sysmem_br(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit(cs, if_dwords + 1); /* GRAS_LRZ_DEPTH_CLEAR = lrz_fc->buffer[1].depth_clear_val */ tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3); - tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_GRAS_LRZ_DEPTH_CLEAR)); + tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(GRAS_LRZ_DEPTH_CLEAR(CHIP).reg)); tu_cs_emit_qw(cs, lrz_fc_iova + offsetof(fd_lrzfc_layout, buffer[1].depth_clear_val)); /* } else { */ tu_cs_emit_pkt7(cs, CP_NOP, else_dwords); /* GRAS_LRZ_DEPTH_CLEAR = lrz_fc->buffer[0].depth_clear_val */ tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3); - tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_GRAS_LRZ_DEPTH_CLEAR)); + tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(GRAS_LRZ_DEPTH_CLEAR(CHIP).reg)); tu_cs_emit_qw(cs, lrz_fc_iova + offsetof(fd_lrzfc_layout, buffer[0].depth_clear_val)); /* } */ diff --git a/src/freedreno/vulkan/tu_pipeline.cc b/src/freedreno/vulkan/tu_pipeline.cc index a927b52b25a..d5e1acc02d9 100644 --- a/src/freedreno/vulkan/tu_pipeline.cc +++ b/src/freedreno/vulkan/tu_pipeline.cc @@ -338,71 +338,72 @@ tu_push_consts_type(const struct tu_pipeline_layout *layout, } } -template -struct xs_config { - uint16_t reg_sp_xs_config; - uint16_t reg_hlsq_xs_ctrl; -}; +static uint32_t +sp_xs_config(const struct ir3_shader_variant *v) +{ + if (!v) + return 0; -template -static const xs_config xs_configs[] = { - [MESA_SHADER_VERTEX] = { - REG_A6XX_SP_VS_CONFIG, - CHIP == A6XX ? REG_A6XX_SP_VS_CONST_CONFIG : REG_A7XX_SP_VS_CONST_CONFIG, - }, - [MESA_SHADER_TESS_CTRL] = { - REG_A6XX_SP_HS_CONFIG, - CHIP == A6XX ? REG_A6XX_SP_HS_CONST_CONFIG : REG_A7XX_SP_HS_CONST_CONFIG, - }, - [MESA_SHADER_TESS_EVAL] = { - REG_A6XX_SP_DS_CONFIG, - CHIP == A6XX ? REG_A6XX_SP_DS_CONST_CONFIG : REG_A7XX_SP_DS_CONST_CONFIG, - }, - [MESA_SHADER_GEOMETRY] = { - REG_A6XX_SP_GS_CONFIG, - CHIP == A6XX ? REG_A6XX_SP_GS_CONST_CONFIG : REG_A7XX_SP_GS_CONST_CONFIG, - }, - [MESA_SHADER_FRAGMENT] = { - REG_A6XX_SP_PS_CONFIG, - CHIP == A6XX ? REG_A6XX_SP_PS_CONST_CONFIG : REG_A7XX_SP_PS_CONST_CONFIG, - }, - [MESA_SHADER_COMPUTE] = { - REG_A6XX_SP_CS_CONFIG, - CHIP == A6XX ? REG_A6XX_SP_CS_CONST_CONFIG : REG_A7XX_SP_CS_CONST_CONFIG, - }, -}; + return A6XX_SP_VS_CONFIG_ENABLED | + COND(v->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) | + COND(v->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) | + COND(v->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_UAV) | + COND(v->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) | + A6XX_SP_VS_CONFIG_NUAV(ir3_shader_num_uavs(v)) | + A6XX_SP_VS_CONFIG_NTEX(v->num_samp) | + A6XX_SP_VS_CONFIG_NSAMP(v->num_samp); +} + +static bool +push_shared_consts(const struct ir3_shader_variant *v) +{ + return v && v->shader_options.push_consts_type == IR3_PUSH_CONSTS_SHARED_PREAMBLE; +} template void -tu6_emit_xs_config(struct tu_crb &crb, - mesa_shader_stage stage, /* xs->type, but xs may be NULL */ - const struct ir3_shader_variant *xs) +tu6_emit_xs_config(struct tu_crb &crb, struct tu_shader_stages stages) { - const struct xs_config *cfg = &xs_configs[stage]; + if (stages.cs) { + crb.add(SP_CS_CONST_CONFIG(CHIP, + .constlen = stages.cs->constlen, + .enabled = true, + .read_imm_shared_consts = push_shared_consts(stages.cs), + )); + crb.add(A6XX_SP_CS_CONFIG(.dword = sp_xs_config(stages.cs))); + } else { + crb.add(SP_VS_CONST_CONFIG(CHIP, + .constlen = COND(stages.vs, stages.vs->constlen), + .enabled = stages.vs, + .read_imm_shared_consts = push_shared_consts(stages.vs), + )); + crb.add(SP_HS_CONST_CONFIG(CHIP, + .constlen = COND(stages.hs, stages.hs->constlen), + .enabled = stages.hs, + .read_imm_shared_consts = push_shared_consts(stages.hs), + )); + crb.add(SP_DS_CONST_CONFIG(CHIP, + .constlen = COND(stages.ds, stages.ds->constlen), + .enabled = stages.ds, + .read_imm_shared_consts = push_shared_consts(stages.ds), + )); + crb.add(SP_GS_CONST_CONFIG(CHIP, + .constlen = COND(stages.gs, stages.gs->constlen), + .enabled = stages.gs, + .read_imm_shared_consts = push_shared_consts(stages.gs), + )); + crb.add(SP_PS_CONST_CONFIG(CHIP, + .constlen = COND(stages.fs, stages.fs->constlen), + .enabled = stages.fs, + .read_imm_shared_consts = push_shared_consts(stages.fs), + )); - if (!xs) { - /* shader stage disabled */ - crb.add(tu_reg_value { .reg = cfg->reg_sp_xs_config, .value = 0 }); - crb.add(tu_reg_value { .reg = cfg->reg_hlsq_xs_ctrl, .value = 0 }); - return; + crb.add(A6XX_SP_VS_CONFIG(.dword = sp_xs_config(stages.vs))); + crb.add(A6XX_SP_HS_CONFIG(.dword = sp_xs_config(stages.hs))); + crb.add(A6XX_SP_DS_CONFIG(.dword = sp_xs_config(stages.ds))); + crb.add(A6XX_SP_GS_CONFIG(.dword = sp_xs_config(stages.gs))); + crb.add(A6XX_SP_PS_CONFIG(.dword = sp_xs_config(stages.fs))); } - - crb.add(tu_reg_value { - .reg = cfg->reg_sp_xs_config, - .value = A6XX_SP_VS_CONFIG_ENABLED | - COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) | - COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) | - COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_UAV) | - COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) | - A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) | - A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp) }); - crb.add(tu_reg_value { - .reg = cfg->reg_hlsq_xs_ctrl, - .value = A6XX_SP_VS_CONST_CONFIG_CONSTLEN(xs->constlen) | - A6XX_SP_VS_CONST_CONFIG_ENABLED | - COND(xs->shader_options.push_consts_type == - IR3_PUSH_CONSTS_SHARED_PREAMBLE, - A7XX_SP_VS_CONST_CONFIG_READ_IMM_SHARED_CONSTS) }); } TU_GENX(tu6_emit_xs_config); @@ -782,73 +783,6 @@ tu6_emit_vpc(struct tu_cs *cs, const struct ir3_shader_variant *gs, const struct ir3_shader_variant *fs) { - /* note: doesn't compile as static because of the array regs.. */ - const struct reg_config { - uint16_t reg_sp_xs_out_reg; - uint16_t reg_sp_xs_vpc_dst_reg; - uint16_t reg_vpc_xs_pack; - uint16_t reg_vpc_xs_clip_cntl; - uint16_t reg_vpc_xs_clip_cntl_v2; - uint16_t reg_gras_xs_cl_cntl; - uint16_t reg_pc_xs_out_cntl; - uint16_t reg_sp_xs_primitive_cntl; - uint16_t reg_vpc_xs_layer_cntl; - uint16_t reg_vpc_xs_layer_cntl_v2; - uint16_t reg_gras_xs_layer_cntl; - } reg_config[] = { - [MESA_SHADER_VERTEX] = { - REG_A6XX_SP_VS_OUTPUT_REG(0), - REG_A6XX_SP_VS_VPC_DEST_REG(0), - REG_A6XX_VPC_VS_CNTL, - REG_A6XX_VPC_VS_CLIP_CULL_CNTL, - REG_A6XX_VPC_VS_CLIP_CULL_CNTL_V2, - REG_A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE, - REG_A6XX_PC_VS_CNTL, - REG_A6XX_SP_VS_OUTPUT_CNTL, - REG_A6XX_VPC_VS_SIV_CNTL, - REG_A6XX_VPC_VS_SIV_CNTL_V2, - REG_A6XX_GRAS_SU_VS_SIV_CNTL, - }, - [MESA_SHADER_TESS_CTRL] = { - 0, - 0, - 0, - 0, - 0, - 0, - REG_A6XX_PC_HS_CNTL, - 0, - 0, - 0 - }, - [MESA_SHADER_TESS_EVAL] = { - REG_A6XX_SP_DS_OUTPUT_REG(0), - REG_A6XX_SP_DS_VPC_DEST_REG(0), - REG_A6XX_VPC_DS_CNTL, - REG_A6XX_VPC_DS_CLIP_CULL_CNTL, - REG_A6XX_VPC_DS_CLIP_CULL_CNTL_V2, - REG_A6XX_GRAS_CL_DS_CLIP_CULL_DISTANCE, - REG_A6XX_PC_DS_CNTL, - REG_A6XX_SP_DS_OUTPUT_CNTL, - REG_A6XX_VPC_DS_SIV_CNTL, - REG_A6XX_VPC_DS_SIV_CNTL_V2, - REG_A6XX_GRAS_SU_DS_SIV_CNTL, - }, - [MESA_SHADER_GEOMETRY] = { - REG_A6XX_SP_GS_OUTPUT_REG(0), - REG_A6XX_SP_GS_VPC_DEST_REG(0), - REG_A6XX_VPC_GS_CNTL, - REG_A6XX_VPC_GS_CLIP_CULL_CNTL, - REG_A6XX_VPC_GS_CLIP_CULL_CNTL_V2, - REG_A6XX_GRAS_CL_GS_CLIP_CULL_DISTANCE, - REG_A6XX_PC_GS_CNTL, - REG_A6XX_SP_GS_OUTPUT_CNTL, - REG_A6XX_VPC_GS_SIV_CNTL, - REG_A6XX_VPC_GS_SIV_CNTL_V2, - REG_A6XX_GRAS_SU_GS_SIV_CNTL, - }, - }; - const struct ir3_shader_variant *last_shader; if (gs) { last_shader = gs; @@ -858,8 +792,6 @@ tu6_emit_vpc(struct tu_cs *cs, last_shader = vs; } - const struct reg_config *cfg = ®_config[last_shader->type]; - struct ir3_shader_linkage linkage = { .primid_loc = 0xff, .clip0_loc = 0xff, @@ -961,6 +893,8 @@ tu6_emit_vpc(struct tu_cs *cs, if (linkage.cnt == 0) ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc); + tu6_emit_vpc_varying_modes(cs, fs, last_shader); + /* map outputs of the last shader to VPC */ assert(linkage.cnt <= 32); const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2); @@ -975,30 +909,121 @@ tu6_emit_vpc(struct tu_cs *cs, A6XX_SP_VS_VPC_DEST_REG_OUTLOC0(linkage.var[i].loc); } - tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count); - tu_cs_emit_array(cs, sp_out, sp_out_count); + tu_crb crb = cs->crb(sp_out_count + sp_vpc_dst_count + 12); + uint32_t *regs; - tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count); - tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count); + switch (last_shader->type) { + case MESA_SHADER_VERTEX: + regs = (uint32_t *)sp_out; + for (unsigned i = 0; i < sp_out_count; i++) + crb.add(A6XX_SP_VS_OUTPUT_REG(i, .dword = regs[i])); - tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1); - tu_cs_emit(cs, A6XX_VPC_VS_CNTL_POSITIONLOC(position_loc) | - A6XX_VPC_VS_CNTL_PSIZELOC(pointsize_loc) | - A6XX_VPC_VS_CNTL_STRIDE_IN_VPC(linkage.max_loc) | - A6XX_VPC_VS_CNTL_EXTRAPOS(extra_pos)); + regs = (uint32_t *)sp_vpc_dst; + for (unsigned i = 0; i < sp_vpc_dst_count; i++) + crb.add(A6XX_SP_VS_VPC_DEST_REG(i, .dword = regs[i])); - tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1); - tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_MASK(clip_cull_mask) | - A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_03_LOC(clip0_loc) | - A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_47_LOC(clip1_loc)); - tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl_v2, 1); - tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_MASK(clip_cull_mask) | - A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_03_LOC(clip0_loc) | - A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_47_LOC(clip1_loc)); + crb.add(VPC_VS_CNTL(CHIP, + .stride_in_vpc = linkage.max_loc, + .positionloc = position_loc, + .psizeloc = pointsize_loc, + .extrapos = extra_pos, + )); - tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1); - tu_cs_emit(cs, A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE_CLIP_MASK(last_shader->clip_mask) | - A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE_CULL_MASK(last_shader->cull_mask)); + crb.add(VPC_VS_CLIP_CULL_CNTL(CHIP, + .clip_mask = clip_cull_mask, + .clip_dist_03_loc = clip0_loc, + .clip_dist_47_loc = clip1_loc, + )); + + if (CHIP <= A7XX) { + crb.add(VPC_VS_CLIP_CULL_CNTL_V2(CHIP, + .clip_mask = clip_cull_mask, + .clip_dist_03_loc = clip0_loc, + .clip_dist_47_loc = clip1_loc, + )); + } + + crb.add(GRAS_CL_VS_CLIP_CULL_DISTANCE(CHIP, + .clip_mask = last_shader->clip_mask, + .cull_mask = last_shader->cull_mask, + )); + + break; + case MESA_SHADER_TESS_EVAL: + regs = (uint32_t *)sp_out; + for (unsigned i = 0; i < sp_out_count; i++) + crb.add(A6XX_SP_DS_OUTPUT_REG(i, .dword = regs[i])); + + regs = (uint32_t *)sp_vpc_dst; + for (unsigned i = 0; i < sp_vpc_dst_count; i++) + crb.add(A6XX_SP_DS_VPC_DEST_REG(i, .dword = regs[i])); + + crb.add(VPC_DS_CNTL(CHIP, + .stride_in_vpc = linkage.max_loc, + .positionloc = position_loc, + .psizeloc = pointsize_loc, + .extrapos = extra_pos, + )); + + crb.add(VPC_DS_CLIP_CULL_CNTL(CHIP, + .clip_mask = clip_cull_mask, + .clip_dist_03_loc = clip0_loc, + .clip_dist_47_loc = clip1_loc, + )); + + if (CHIP <= A7XX) { + crb.add(VPC_DS_CLIP_CULL_CNTL_V2(CHIP, + .clip_mask = clip_cull_mask, + .clip_dist_03_loc = clip0_loc, + .clip_dist_47_loc = clip1_loc, + )); + } + + crb.add(GRAS_CL_DS_CLIP_CULL_DISTANCE(CHIP, + .clip_mask = last_shader->clip_mask, + .cull_mask = last_shader->cull_mask, + )); + + break; + case MESA_SHADER_GEOMETRY: + regs = (uint32_t *)sp_out; + for (unsigned i = 0; i < sp_out_count; i++) + crb.add(A6XX_SP_GS_OUTPUT_REG(i, .dword = regs[i])); + + regs = (uint32_t *)sp_vpc_dst; + for (unsigned i = 0; i < sp_vpc_dst_count; i++) + crb.add(A6XX_SP_GS_VPC_DEST_REG(i, .dword = regs[i])); + + crb.add(VPC_GS_CNTL(CHIP, + .stride_in_vpc = linkage.max_loc, + .positionloc = position_loc, + .psizeloc = pointsize_loc, + .extrapos = extra_pos, + )); + + crb.add(VPC_GS_CLIP_CULL_CNTL(CHIP, + .clip_mask = clip_cull_mask, + .clip_dist_03_loc = clip0_loc, + .clip_dist_47_loc = clip1_loc, + )); + + if (CHIP <= A7XX) { + crb.add(VPC_GS_CLIP_CULL_CNTL_V2(CHIP, + .clip_mask = clip_cull_mask, + .clip_dist_03_loc = clip0_loc, + .clip_dist_47_loc = clip1_loc, + )); + } + + crb.add(GRAS_CL_GS_CLIP_CULL_DISTANCE(CHIP, + .clip_mask = last_shader->clip_mask, + .cull_mask = last_shader->cull_mask, + )); + + break; + default: + UNREACHABLE("bad last_shader type"); + } const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs }; @@ -1009,18 +1034,50 @@ tu6_emit_vpc(struct tu_cs *cs, bool primid = shader->type != MESA_SHADER_VERTEX && VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID)); + bool last = shader == last_shader; - tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1); - if (shader == last_shader) { - tu_cs_emit(cs, A6XX_PC_VS_CNTL_STRIDE_IN_VPC(linkage.max_loc) | - CONDREG(pointsize_regid, A6XX_PC_VS_CNTL_PSIZE) | - CONDREG(layer_regid, A6XX_PC_VS_CNTL_LAYER) | - CONDREG(view_regid, A6XX_PC_VS_CNTL_VIEW) | - COND(primid, A6XX_PC_VS_CNTL_PRIMITIVE_ID) | - A6XX_PC_VS_CNTL_CLIP_MASK(clip_cull_mask) | - CONDREG(shading_rate_regid, A6XX_PC_VS_CNTL_SHADINGRATE)); - } else { - tu_cs_emit(cs, COND(primid, A6XX_PC_VS_CNTL_PRIMITIVE_ID)); + + switch (shader->type) { + case MESA_SHADER_VERTEX: + crb.add(PC_VS_CNTL(CHIP, + .stride_in_vpc = COND(last, linkage.max_loc), + .psize = COND(last, VALIDREG(pointsize_regid)), + .layer = COND(last, VALIDREG(layer_regid)), + .view = COND(last, VALIDREG(view_regid)), + .primitive_id = primid, + .clip_mask = COND(last, clip_cull_mask), + .shadingrate = COND(last, VALIDREG(shading_rate_regid)), + )); + break; + case MESA_SHADER_TESS_CTRL: + assert(!last); + crb.add(PC_HS_CNTL(CHIP, + .primitive_id = primid, + )); + case MESA_SHADER_TESS_EVAL: + crb.add(PC_DS_CNTL(CHIP, + .stride_in_vpc = COND(last, linkage.max_loc), + .psize = COND(last, VALIDREG(pointsize_regid)), + .layer = COND(last, VALIDREG(layer_regid)), + .view = COND(last, VALIDREG(view_regid)), + .primitive_id = primid, + .clip_mask = COND(last, clip_cull_mask), + .shadingrate = COND(last, VALIDREG(shading_rate_regid)), + )); + break; + case MESA_SHADER_GEOMETRY: + crb.add(PC_GS_CNTL(CHIP, + .stride_in_vpc = COND(last, linkage.max_loc), + .psize = COND(last, VALIDREG(pointsize_regid)), + .layer = COND(last, VALIDREG(layer_regid)), + .view = COND(last, VALIDREG(view_regid)), + .primitive_id = primid, + .clip_mask = COND(last, clip_cull_mask), + .shadingrate = COND(last, VALIDREG(shading_rate_regid)), + )); + break; + default: + break; } } @@ -1028,24 +1085,67 @@ tu6_emit_vpc(struct tu_cs *cs, if (gs) assert(flags_regid != INVALID_REG); - tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1); - tu_cs_emit(cs, A6XX_SP_VS_OUTPUT_CNTL_OUT(linkage.cnt) | - A6XX_SP_GS_OUTPUT_CNTL_FLAGS_REGID(flags_regid)); - - tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1); - tu_cs_emit(cs, A6XX_VPC_VS_SIV_CNTL_LAYERLOC(layer_loc) | - A6XX_VPC_VS_SIV_CNTL_VIEWLOC(view_loc) | - A6XX_VPC_VS_SIV_CNTL_SHADINGRATELOC(shading_rate_loc)); - tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl_v2, 1); - tu_cs_emit(cs, A6XX_VPC_VS_SIV_CNTL_LAYERLOC(layer_loc) | - A6XX_VPC_VS_SIV_CNTL_VIEWLOC(view_loc) | - A6XX_VPC_VS_SIV_CNTL_SHADINGRATELOC(shading_rate_loc)); - - tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1); - tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_SU_VS_SIV_CNTL_WRITES_LAYER) | - CONDREG(view_regid, A6XX_GRAS_SU_VS_SIV_CNTL_WRITES_VIEW)); - - tu6_emit_vpc_varying_modes(cs, fs, last_shader); + switch (last_shader->type) { + case MESA_SHADER_VERTEX: + crb.add(A6XX_SP_VS_OUTPUT_CNTL(.out = linkage.cnt)); + crb.add(VPC_VS_SIV_CNTL(CHIP, + .layerloc = layer_loc, + .viewloc = view_loc, + .shadingrateloc = shading_rate_loc, + )); + if (CHIP <= A7XX) { + crb.add(VPC_VS_SIV_CNTL_V2(CHIP, + .layerloc = layer_loc, + .viewloc = view_loc, + .shadingrateloc = shading_rate_loc, + )); + } + crb.add(GRAS_SU_VS_SIV_CNTL(CHIP, + .writes_layer = VALIDREG(layer_regid), + .writes_view = VALIDREG(view_regid), + )); + break; + case MESA_SHADER_TESS_EVAL: + crb.add(A6XX_SP_DS_OUTPUT_CNTL(.out = linkage.cnt)); + crb.add(VPC_DS_SIV_CNTL(CHIP, + .layerloc = layer_loc, + .viewloc = view_loc, + .shadingrateloc = shading_rate_loc, + )); + if (CHIP <= A7XX) { + crb.add(VPC_DS_SIV_CNTL_V2(CHIP, + .layerloc = layer_loc, + .viewloc = view_loc, + .shadingrateloc = shading_rate_loc, + )); + } + crb.add(GRAS_SU_DS_SIV_CNTL(CHIP, + .writes_layer = VALIDREG(layer_regid), + .writes_view = VALIDREG(view_regid), + )); + break; + case MESA_SHADER_GEOMETRY: + crb.add(A6XX_SP_GS_OUTPUT_CNTL(.out = linkage.cnt, .flags_regid = flags_regid)); + crb.add(VPC_GS_SIV_CNTL(CHIP, + .layerloc = layer_loc, + .viewloc = view_loc, + .shadingrateloc = shading_rate_loc, + )); + if (CHIP <= A7XX) { + crb.add(VPC_GS_SIV_CNTL_V2(CHIP, + .layerloc = layer_loc, + .viewloc = view_loc, + .shadingrateloc = shading_rate_loc, + )); + } + crb.add(GRAS_SU_GS_SIV_CNTL(CHIP, + .writes_layer = VALIDREG(layer_regid), + .writes_view = VALIDREG(view_regid), + )); + break; + default: + UNREACHABLE("bad last_shader type"); + } } TU_GENX(tu6_emit_vpc); @@ -1159,8 +1259,7 @@ tu6_emit_patch_control_points(struct tu_cs *cs, patch_control_points * vs->variant->output_size / 4; /* Total attribute slots in HS incoming patch. */ - tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_PARAM_1, 1); - tu_cs_emit(cs, patch_local_mem_size_16b); + tu_cs_emit_regs(cs, PC_HS_PARAM_1(CHIP, patch_local_mem_size_16b)); const uint32_t wavesize = 64; const uint32_t vs_hs_local_mem_size = 16384; @@ -1266,11 +1365,14 @@ tu6_emit_program_config(struct tu_cs *cs, .ds_state = true, .gs_state = true, .fs_state = true, .gfx_uav = true, .gfx_shared_const = shared_consts_enable)); - for (size_t stage_idx = MESA_SHADER_VERTEX; - stage_idx <= MESA_SHADER_FRAGMENT; stage_idx++) { - mesa_shader_stage stage = (mesa_shader_stage) stage_idx; - tu6_emit_xs_config(crb, stage, variants[stage]); - } + + const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX]; + const struct ir3_shader_variant *hs = variants[MESA_SHADER_TESS_CTRL]; + const struct ir3_shader_variant *ds = variants[MESA_SHADER_TESS_EVAL]; + const struct ir3_shader_variant *gs = variants[MESA_SHADER_GEOMETRY]; + const struct ir3_shader_variant *fs = variants[MESA_SHADER_FRAGMENT]; + + tu6_emit_xs_config(crb, { .vs = vs, .hs = hs, .ds = ds, .gs = gs, .fs = fs }); crb.flush(); @@ -1280,11 +1382,6 @@ tu6_emit_program_config(struct tu_cs *cs, tu6_emit_dynamic_offset(cs, variants[stage], shaders[stage], prog); } - const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX]; - const struct ir3_shader_variant *hs = variants[MESA_SHADER_TESS_CTRL]; - const struct ir3_shader_variant *ds = variants[MESA_SHADER_TESS_EVAL]; - const struct ir3_shader_variant *gs = variants[MESA_SHADER_GEOMETRY]; - if (hs) { tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER); tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER); @@ -1304,8 +1401,9 @@ tu6_emit_program_config(struct tu_cs *cs, uint32_t vec4_size = gs->gs.vertices_in * DIV_ROUND_UP(prev_stage_output_size, 4); - tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); - tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size)); + tu_cs_emit_regs(cs, PC_PRIMITIVE_CNTL_6(CHIP, + .stride_in_vpc = vec4_size, + )); } uint32_t prim_size = prev_stage_output_size; @@ -2866,17 +2964,18 @@ void tu6_emit_sample_locations(struct tu_cs *cs, bool enable, const struct vk_sample_locations_state *samp_loc) { - uint32_t sample_config = - COND(enable, A6XX_RB_MSAA_SAMPLE_POS_CNTL_LOCATION_ENABLE); + tu_cs_emit_regs(cs, GRAS_SC_MSAA_SAMPLE_POS_CNTL(CHIP, + .location_enable = enable, + )); + tu_cs_emit_regs(cs, A6XX_RB_MSAA_SAMPLE_POS_CNTL( + .location_enable = enable, + )); - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_MSAA_SAMPLE_POS_CNTL, 1); - tu_cs_emit(cs, sample_config); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_SAMPLE_POS_CNTL, 1); - tu_cs_emit(cs, sample_config); - - tu_cs_emit_pkt4(cs, REG_A6XX_TPL1_MSAA_SAMPLE_POS_CNTL, 1); - tu_cs_emit(cs, sample_config); + if (CHIP <= A7XX) { + tu_cs_emit_regs(cs, TPL1_MSAA_SAMPLE_POS_CNTL(CHIP, + .location_enable = enable, + )); + } if (!enable) return; @@ -2903,14 +3002,21 @@ tu6_emit_sample_locations(struct tu_cs *cs, bool enable, A6XX_RB_PROGRAMMABLE_MSAA_POS_0_SAMPLE_0_Y(y))) << i*8; } - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_PROGRAMMABLE_MSAA_POS_0, 2); - tu_cs_emit_qw(cs, sample_locations); + tu_cs_emit_regs(cs, + GRAS_SC_PROGRAMMABLE_MSAA_POS_0(CHIP, .dword = sample_locations), + GRAS_SC_PROGRAMMABLE_MSAA_POS_1(CHIP, .dword = sample_locations >> 32), + ); + tu_cs_emit_regs(cs, + A6XX_RB_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations), + A6XX_RB_PROGRAMMABLE_MSAA_POS_1(.dword = sample_locations >> 32), + ); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_PROGRAMMABLE_MSAA_POS_0, 2); - tu_cs_emit_qw(cs, sample_locations); - - tu_cs_emit_pkt4(cs, REG_A6XX_TPL1_PROGRAMMABLE_MSAA_POS_0, 2); - tu_cs_emit_qw(cs, sample_locations); + if (CHIP <= A7XX) { + tu_cs_emit_regs(cs, + TPL1_PROGRAMMABLE_MSAA_POS_0(CHIP, .dword = sample_locations), + TPL1_PROGRAMMABLE_MSAA_POS_1(CHIP, .dword = sample_locations >> 32), + ); + } } static const enum mesa_vk_dynamic_graphics_state tu_depth_bias_state[] = { diff --git a/src/freedreno/vulkan/tu_pipeline.h b/src/freedreno/vulkan/tu_pipeline.h index 1fab462cd69..d295e30a1dd 100644 --- a/src/freedreno/vulkan/tu_pipeline.h +++ b/src/freedreno/vulkan/tu_pipeline.h @@ -301,11 +301,14 @@ struct tu_pvtmem_config { bool per_wave; }; +struct tu_shader_stages { + const struct ir3_shader_variant *vs, *hs, *ds, *gs, *fs, *cs; +}; + template void tu6_emit_xs_config(struct tu_crb &crb, - mesa_shader_stage stage, - const struct ir3_shader_variant *xs); + struct tu_shader_stages stages); template void diff --git a/src/freedreno/vulkan/tu_query_pool.cc b/src/freedreno/vulkan/tu_query_pool.cc index 98ca316f0ba..fc535bf37b5 100644 --- a/src/freedreno/vulkan/tu_query_pool.cc +++ b/src/freedreno/vulkan/tu_query_pool.cc @@ -27,7 +27,19 @@ #define NSEC_PER_SEC 1000000000ull #define WAIT_TIMEOUT 5 -#define STAT_COUNT ((REG_A6XX_RBBM_PIPESTAT_CSINVOCATIONS - REG_A6XX_RBBM_PIPESTAT_IAVERTICES) / 2 + 1) +#define __COUNTER_REG(CHIP, name) __RBBM_PIPESTAT_ ## name ({}).reg +#define COUNTER_REG(name) __COUNTER_REG(CHIP, name) + +/* Note: gen8 changes the order of the pipestat regs, but in either case + * they ones we are interested in are consecutive, so for the purposes of + * knowning how many values to read we can just use A6XX reg addresses. + * + * And in both cases, RBBM_PIPESTAT_IAVERTICES is the first one. + * + * Depending on how/if they shuffle around in the future, we might need + * to shift to reading them individually, like gallium does. + */ +#define STAT_COUNT ((__COUNTER_REG(A6XX, CSINVOCATIONS) - __COUNTER_REG(A6XX, IAVERTICES)) / 2 + 1) struct PACKED query_slot { uint64_t available; @@ -463,35 +475,38 @@ get_result_count(struct tu_query_pool *pool) } } +template static uint32_t statistics_index(uint32_t *statistics) { uint32_t stat; stat = u_bit_scan(statistics); +#define COUNTER_OFFSET(name) ((COUNTER_REG(name) - COUNTER_REG(IAVERTICES)) / 2) + switch (1 << stat) { case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT: - return 0; + return COUNTER_OFFSET(IAVERTICES); case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT: - return 1; + return COUNTER_OFFSET(IAPRIMITIVES); case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT: - return 2; + return COUNTER_OFFSET(VSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT: - return 5; + return COUNTER_OFFSET(GSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT: - return 6; + return COUNTER_OFFSET(GSPRIMITIVES); case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT: - return 7; + return COUNTER_OFFSET(CINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT: - return 8; + return COUNTER_OFFSET(CPRIMITIVES); case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT: - return 9; + return COUNTER_OFFSET(PSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT: - return 3; + return COUNTER_OFFSET(HSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT: - return 4; + return COUNTER_OFFSET(DSINVOCATIONS); case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT: - return 10; + return COUNTER_OFFSET(CSINVOCATIONS); default: return 0; } @@ -588,6 +603,7 @@ write_performance_query_value_cpu(char *base, } } +template static VkResult get_query_pool_results(struct tu_device *device, struct tu_query_pool *pool, @@ -634,7 +650,7 @@ get_query_pool_results(struct tu_device *device, uint64_t *result; if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { - uint32_t stat_idx = statistics_index(&statistics); + uint32_t stat_idx = statistics_index(&statistics); result = query_result_addr(pool, query, uint64_t, stat_idx); } else if (is_perf_query_raw(pool)) { result = query_result_addr(pool, query, struct perfcntr_query_slot, k); @@ -703,6 +719,7 @@ get_query_pool_results(struct tu_device *device, return result; } +template VKAPI_ATTR VkResult VKAPI_CALL tu_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, @@ -731,13 +748,14 @@ tu_GetQueryPoolResults(VkDevice _device, case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR: - return get_query_pool_results(device, pool, firstQuery, queryCount, - dataSize, pData, stride, flags); + return get_query_pool_results(device, pool, firstQuery, queryCount, + dataSize, pData, stride, flags); default: assert(!"Invalid query type"); } return VK_SUCCESS; } +TU_GENX(tu_GetQueryPoolResults); /* Copies a query value from one buffer to another from the GPU. */ static void @@ -808,7 +826,7 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf, uint64_t result_iova; if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { - uint32_t stat_idx = statistics_index(&statistics); + uint32_t stat_idx = statistics_index(&statistics); result_iova = query_result_iova(pool, query, uint64_t, stat_idx); } else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { result_iova = query_result_iova(pool, query, @@ -895,6 +913,7 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, } TU_GENX(tu_CmdCopyQueryPoolResults); +template static void emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf, struct tu_query_pool *pool, @@ -915,7 +934,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf, uint64_t result_iova; if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { - uint32_t stat_idx = statistics_index(&statistics); + uint32_t stat_idx = statistics_index(&statistics); result_iova = query_result_iova(pool, query, uint64_t, stat_idx); } else if (is_perf_query_raw(pool)) { result_iova = query_result_iova(pool, query, @@ -949,6 +968,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf, } +template VKAPI_ATTR void VKAPI_CALL tu_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, @@ -969,12 +989,13 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer, case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR: - emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount); + emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount); break; default: assert(!"Invalid query type"); } } +TU_GENX(tu_CmdResetQueryPool); VKAPI_ATTR void VKAPI_CALL tu_ResetQueryPool(VkDevice device, @@ -1147,7 +1168,7 @@ emit_begin_stat_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_wfi(cs); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); - tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) | + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) | CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) | CP_REG_TO_MEM_0_64B); tu_cs_emit_qw(cs, begin_iova); @@ -1365,7 +1386,7 @@ emit_begin_prim_generated_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_wfi(cs); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); - tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_CINVOCATIONS) | + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(CINVOCATIONS)) | CP_REG_TO_MEM_0_CNT(2) | CP_REG_TO_MEM_0_64B); tu_cs_emit_qw(cs, begin_iova); @@ -1633,7 +1654,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_wfi(cs); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); - tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) | + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) | CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) | CP_REG_TO_MEM_0_64B); tu_cs_emit_qw(cs, end_iova); @@ -1918,7 +1939,7 @@ emit_end_prim_generated_query(struct tu_cmd_buffer *cmdbuf, tu_cs_emit_wfi(cs); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); - tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_CINVOCATIONS) | + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(CINVOCATIONS)) | CP_REG_TO_MEM_0_CNT(2) | CP_REG_TO_MEM_0_64B); tu_cs_emit_qw(cs, end_iova); @@ -2031,6 +2052,7 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, } TU_GENX(tu_CmdEndQueryIndexedEXT); +template VKAPI_ATTR void VKAPI_CALL tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits2 pipelineStage, @@ -2067,7 +2089,7 @@ tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, } tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); - tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_CP_ALWAYS_ON_COUNTER) | + tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(__CP_ALWAYS_ON_COUNTER({}).reg) | CP_REG_TO_MEM_0_CNT(2) | CP_REG_TO_MEM_0_64B); tu_cs_emit_qw(cs, query_result_iova(pool, query, uint64_t, 0)); @@ -2108,6 +2130,7 @@ tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, */ handle_multiview_queries(cmd, pool, query); } +TU_GENX(tu_CmdWriteTimestamp2); VKAPI_ATTR void VKAPI_CALL tu_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer, diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index a6118c93e02..6b60527ae54 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -1781,7 +1781,7 @@ tu6_emit_cs_config(struct tu_cs *cs, crb.add(SP_UPDATE_CNTL(CHIP, .cs_state = true, .cs_uav = true, .cs_shared_const = shared_consts_enable)); - tu6_emit_xs_config(crb, MESA_SHADER_COMPUTE, v); + tu6_emit_xs_config(crb, { .cs = v }); tu6_emit_xs(crb, cs->device, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova); } tu6_emit_xs_constants(cs, MESA_SHADER_COMPUTE, v, binary_iova); @@ -2031,50 +2031,54 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) need_size = true; } - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_INTERP_CNTL, 1); - tu_cs_emit(cs, - CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_PIXEL) | - CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_CENTROID) | - CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_SAMPLE) | - CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_PIXEL) | - CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_CENTROID) | - CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_SAMPLE) | - COND(need_size, A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_PIXEL) | - COND(need_size_persamp, A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_SAMPLE) | - COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CL_INTERP_CNTL_COORD_MASK(fs->fragcoord_compmask))); + tu_cs_emit_regs(cs, + GRAS_CL_INTERP_CNTL(CHIP, + .ij_persp_pixel = VALIDREG(ij_regid[IJ_PERSP_PIXEL]), + .ij_persp_centroid = VALIDREG(ij_regid[IJ_PERSP_CENTROID]), + .ij_persp_sample = VALIDREG(ij_regid[IJ_PERSP_SAMPLE]), + .ij_linear_pixel = VALIDREG(ij_regid[IJ_LINEAR_PIXEL]) || need_size, + .ij_linear_centroid = VALIDREG(ij_regid[IJ_LINEAR_CENTROID]), + .ij_linear_sample = VALIDREG(ij_regid[IJ_LINEAR_SAMPLE]) || need_size_persamp, + .coord_mask = fs->fragcoord_compmask, + ) + ); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_INTERP_CNTL, 2); - tu_cs_emit(cs, - CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_RB_INTERP_CNTL_IJ_PERSP_PIXEL) | - CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_RB_INTERP_CNTL_IJ_PERSP_CENTROID) | - CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_RB_INTERP_CNTL_IJ_PERSP_SAMPLE) | - CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_RB_INTERP_CNTL_IJ_LINEAR_PIXEL) | - CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_RB_INTERP_CNTL_IJ_LINEAR_CENTROID) | - CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_RB_INTERP_CNTL_IJ_LINEAR_SAMPLE) | - COND(need_size, A6XX_RB_INTERP_CNTL_IJ_LINEAR_PIXEL) | - COND(enable_varyings, A6XX_RB_INTERP_CNTL_INTERP_EN) | - COND(need_size_persamp, A6XX_RB_INTERP_CNTL_IJ_LINEAR_SAMPLE) | - COND(fs->fragcoord_compmask != 0, - A6XX_RB_INTERP_CNTL_COORD_MASK(fs->fragcoord_compmask))); - tu_cs_emit(cs, - A6XX_RB_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE( - sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER) | - CONDREG(smask_in_regid, A6XX_RB_PS_INPUT_CNTL_SAMPLEMASK) | - CONDREG(samp_id_regid, A6XX_RB_PS_INPUT_CNTL_SAMPLEID) | - CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_PS_INPUT_CNTL_CENTERRHW) | - COND(fs->frag_face, A6XX_RB_PS_INPUT_CNTL_FACENESS) | - CONDREG(shading_rate_regid, A6XX_RB_PS_INPUT_CNTL_FOVEATION)); + tu_cs_emit_regs(cs, + A6XX_RB_INTERP_CNTL( + .ij_persp_pixel = VALIDREG(ij_regid[IJ_PERSP_PIXEL]), + .ij_persp_centroid = VALIDREG(ij_regid[IJ_PERSP_CENTROID]), + .ij_persp_sample = VALIDREG(ij_regid[IJ_PERSP_SAMPLE]), + .ij_linear_pixel = VALIDREG(ij_regid[IJ_LINEAR_PIXEL]) || need_size, + .ij_linear_centroid = VALIDREG(ij_regid[IJ_LINEAR_CENTROID]), + .ij_linear_sample = VALIDREG(ij_regid[IJ_LINEAR_SAMPLE]) || need_size_persamp, + .coord_mask = fs->fragcoord_compmask, + .interp_en = enable_varyings, + ), + A6XX_RB_PS_INPUT_CNTL( + .samplemask = VALIDREG(smask_in_regid), + .postdepthcoverage = fs->post_depth_coverage, + .faceness = fs->frag_face, + .sampleid = VALIDREG(samp_id_regid), + .fragcoordsamplemode = sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER, + .centerrhw = VALIDREG(ij_regid[IJ_PERSP_CENTER_RHW]), + .foveation = VALIDREG(shading_rate_regid), + ), + ); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_PS_SAMPLEFREQ_CNTL, 1); - tu_cs_emit(cs, COND(sample_shading, A6XX_RB_PS_SAMPLEFREQ_CNTL_PER_SAMP_MODE)); + tu_cs_emit_regs(cs, + A6XX_RB_PS_SAMPLEFREQ_CNTL(sample_shading) + ); - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 1); - tu_cs_emit(cs, CONDREG(samp_id_regid, A6XX_GRAS_LRZ_PS_INPUT_CNTL_SAMPLEID) | - A6XX_GRAS_LRZ_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE( - sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER)); + tu_cs_emit_regs(cs, + GRAS_LRZ_PS_INPUT_CNTL(CHIP, + .sampleid = VALIDREG(samp_id_regid), + .fragcoordsamplemode = sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER, + ) + ); - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL, 1); - tu_cs_emit(cs, COND(sample_shading, A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL_PER_SAMP_MODE)); + tu_cs_emit_regs(cs, + A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL(sample_shading) + ); uint32_t varmask[4] = { 0 }; @@ -2200,11 +2204,11 @@ tu6_emit_vs(struct tu_cs *cs, bool multi_pos_output = vs->multi_pos_output; uint32_t multiview_views = util_logbase2(view_mask) + 1; - uint32_t multiview_cntl = view_mask ? - A6XX_PC_STEREO_RENDERING_CNTL_ENABLE | - A6XX_PC_STEREO_RENDERING_CNTL_VIEWS(multiview_views) | - COND(!multi_pos_output, A6XX_PC_STEREO_RENDERING_CNTL_DISABLEMULTIPOS) - : 0; + struct fd_reg_pair multiview_cntl = PC_STEREO_RENDERING_CNTL(CHIP, + .enable = view_mask, + .disablemultipos = !multi_pos_output, + .views = multiview_views, + ); /* Copy what the blob does here. This will emit an extra 0x3f * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what @@ -2213,27 +2217,31 @@ tu6_emit_vs(struct tu_cs *cs, if (cs->device->physical_device->info->props.has_cp_reg_write) { tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE)); - tu_cs_emit(cs, REG_A6XX_PC_STEREO_RENDERING_CNTL); + tu_cs_emit(cs, multiview_cntl.reg); } else { - tu_cs_emit_pkt4(cs, REG_A6XX_PC_STEREO_RENDERING_CNTL, 1); + tu_cs_emit_pkt4(cs, multiview_cntl.reg, 1); } - tu_cs_emit(cs, multiview_cntl); + tu_cs_emit(cs, multiview_cntl.value); - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_STEREO_RENDERING_CNTL, 1); - tu_cs_emit(cs, multiview_cntl); + tu_cs_emit_regs(cs, A6XX_VFD_STEREO_RENDERING_CNTL( + .enable = view_mask, + .disablemultipos = !multi_pos_output, + .views = multiview_views, + )); - if (multiview_cntl && + if (view_mask && cs->device->physical_device->info->props.supports_multiview_mask) { - tu_cs_emit_pkt4(cs, REG_A6XX_PC_STEREO_RENDERING_VIEWMASK, 1); - tu_cs_emit(cs, view_mask); + tu_cs_emit_regs(cs, PC_STEREO_RENDERING_VIEWMASK(CHIP, view_mask)); } if (CHIP >= A7XX) { - tu_cs_emit_pkt4(cs, REG_A7XX_VPC_STEREO_RENDERING_CNTL, 1); - tu_cs_emit(cs, multiview_cntl); + tu_cs_emit_regs(cs, VPC_STEREO_RENDERING_CNTL(CHIP, + .enable = view_mask, + .disablemultipos = !multi_pos_output, + .views = multiview_views, + )); - tu_cs_emit_pkt4(cs, REG_A7XX_VPC_STEREO_RENDERING_VIEWMASK, 1); - tu_cs_emit(cs, view_mask); + tu_cs_emit_regs(cs, VPC_STEREO_RENDERING_VIEWMASK(CHIP, view_mask)); } tu6_emit_vfd_dest(cs, vs); @@ -2276,8 +2284,7 @@ tu6_emit_hs(struct tu_cs *cs, A6XX_VFD_CNTL_2_REGID_INVOCATIONID(hs_invocation_regid)); if (hs) { - tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_PARAM_0, 1); - tu_cs_emit(cs, hs->tess.tcs_vertices_out); + tu_cs_emit_regs(cs, PC_HS_PARAM_0(CHIP, hs->tess.tcs_vertices_out)); } } TU_GENX(tu6_emit_hs); @@ -2524,7 +2531,7 @@ tu_upload_shader(struct tu_device *dev, size += TU6_EMIT_VFD_DEST_MAX_DWORDS; const unsigned xs_size = 128; - const unsigned vpc_size = 32 + (v->stream_output.num_outputs != 0 ? 256 : 0); + const unsigned vpc_size = 64 + (v->stream_output.num_outputs != 0 ? 256 : 0); for (auto& variant : {v, binning, safe_const, safe_const_binning}) { if (variant) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc index dc478ae0eeb..a3eb2c3cb8a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc @@ -3,8 +3,6 @@ * SPDX-License-Identifier: MIT */ -#define FD_BO_NO_HARDPIN 1 - #include "freedreno_batch.h" #include "fd6_barrier.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.cc b/src/gallium/drivers/freedreno/a6xx/fd6_blend.cc index 769f802baeb..3cde4a02c1b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "pipe/p_state.h" #include "util/u_blend.h" #include "util/u_dual_blend.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc index da1e28580fb..5ace6ea1b45 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "util/format_srgb.h" #include "util/half_float.h" #include "util/u_dump.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc index 314db5ef580..e1731555120 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc @@ -7,7 +7,6 @@ */ #include "drm/freedreno_ringbuffer.h" -#define FD_BO_NO_HARDPIN 1 #include "pipe/p_state.h" #include "util/u_dump.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc index b2d94a1cf91..b91123a35a5 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.cc @@ -4,8 +4,6 @@ * SPDX-License-Identifier: MIT */ -#define FD_BO_NO_HARDPIN 1 - #include "fd6_barrier.h" #include "fd6_const.h" #include "fd6_compute.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc index 0211f5385e2..f47f7438948 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "freedreno_query_acc.h" #include "freedreno_state.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc index 00232bad861..64e508acb97 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "pipe/p_state.h" #include "util/u_memory.h" #include "util/u_prim.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index 48d02729550..c1bb053392d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "pipe/p_state.h" #include "util/format/u_format.h" #include "util/u_helpers.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 14e61527903..4e5256c270d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -220,13 +220,17 @@ __event_write(fd_cs &cs, enum fd_gpu_event event, fd_pkt7 pkt(cs, CP_EVENT_WRITE, len); if (CHIP == A6XX) { - pkt.add(CP_EVENT_WRITE_0_EVENT(info.raw_event) | - COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP)); + pkt.add(CP_EVENT_WRITE_0( + .event = info.raw_event, + .timestamp = info.needs_seqno, + )); } else if (CHIP >= A7XX) { - pkt.add(CP_EVENT_WRITE7_0_EVENT(info.raw_event) | - CP_EVENT_WRITE7_0_WRITE_SRC(esrc) | - CP_EVENT_WRITE7_0_WRITE_DST(edst) | - COND(info.needs_seqno, CP_EVENT_WRITE7_0_WRITE_ENABLED)); + pkt.add(CP_EVENT_WRITE7_0( + .event = info.raw_event, + .write_src = esrc, + .write_dst = edst, + .write_enabled = info.needs_seqno, + )); } if (info.needs_seqno) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc index b59f0f35370..86dad45ec0d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include #include "pipe/p_state.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.cc b/src/gallium/drivers/freedreno/a6xx/fd6_image.cc index eee9f113d09..12d6b9dbd40 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "pipe/p_state.h" #include "freedreno_resource.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc index 35886125033..a32d29fb40d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include #include "pipe/p_state.h" @@ -884,9 +882,6 @@ emit_vpc(fd_crb &crb, const struct program_builder *b) } } - /* if vertex_flags somehow gets optimized out, your gonna have a bad time: */ - assert(flags_regid != INVALID_REG); - switch (last_shader->type) { case MESA_SHADER_VERTEX: crb.add(A6XX_SP_VS_OUTPUT_CNTL(.out = linkage.cnt)); @@ -927,6 +922,9 @@ emit_vpc(fd_crb &crb, const struct program_builder *b) )); break; case MESA_SHADER_GEOMETRY: + /* if vertex_flags somehow gets optimized out, your gonna have a bad time: */ + assert(flags_regid != INVALID_REG); + crb.add(A6XX_SP_GS_OUTPUT_CNTL(.out = linkage.cnt, .flags_regid = flags_regid)); crb.add(VPC_GS_SIV_CNTL(CHIP, .layerloc = layer_loc, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_query.cc b/src/gallium/drivers/freedreno/a6xx/fd6_query.cc index 016f2db270f..181a2aa4a26 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_query.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_query.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - /* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */ #include "freedreno_query_acc.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc index d689d02415d..894d4915481 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "pipe/p_state.h" #include "util/u_memory.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.cc b/src/gallium/drivers/freedreno/a6xx/fd6_resource.cc index bdc0d761ceb..ba83f746978 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_resource.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "drm-uapi/drm_fourcc.h" #include "a6xx/fd6_blitter.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc b/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc index 2d2926b26a4..dc8723529ce 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "drm-uapi/drm_fourcc.h" #include "pipe/p_screen.h" #include "util/format/u_format.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.cc b/src/gallium/drivers/freedreno/a6xx/fd6_texture.cc index e0172e89a85..393e353ccd1 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "pipe/p_state.h" #include "util/format/u_format.h" #include "util/hash_table.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.cc b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.cc index 64ee51a1ae5..17ed3298aed 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_vsc.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_vsc.cc @@ -3,8 +3,6 @@ * SPDX-License-Identifier: MIT */ -#define FD_BO_NO_HARDPIN 1 - #include "pipe/p_state.h" #include "freedreno_batch.h" diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc index 1ec97d28c22..3ff81678e5c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc @@ -7,8 +7,6 @@ * Rob Clark */ -#define FD_BO_NO_HARDPIN 1 - #include "pipe/p_state.h" #include "util/u_memory.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index 8e4fdc41374..57cda5ffb43 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -235,6 +235,7 @@ freedreno_c_args += cc.get_supported_arguments([ freedreno_cpp_args = [] freedreno_cpp_args += cpp.get_supported_arguments([ + '-DFD_BO_NO_HARDPIN=1', '-fno-exceptions', '-fno-rtti', '-Wno-address-of-packed-member',