Merge branch 'tu/convert-deprecated-reg-builders' into 'main'

turnip: Convert deprecated reg builders

See merge request mesa/mesa!39029
This commit is contained in:
Rob Clark 2025-12-20 00:23:53 +00:00
commit 38454dec8f
31 changed files with 579 additions and 440 deletions

View file

@ -8,6 +8,8 @@
#include <stdint.h> #include <stdint.h>
#include "util/macros.h"
#include "fd6_hw.h" #include "fd6_hw.h"
/* In order to debug issues with usage of stale reg data we need to have /* In order to debug issues with usage of stale reg data we need to have
@ -26,6 +28,9 @@
static inline bool static inline bool
fd_reg_stomp_allowed(chip CHIP, uint16_t reg) fd_reg_stomp_allowed(chip CHIP, uint16_t reg)
{ {
PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_IGNORED_CLANG(-W#pragma-messages)
switch (CHIP) { switch (CHIP) {
case A6XX: { case A6XX: {
switch (reg) { switch (reg) {
@ -77,6 +82,7 @@ fd_reg_stomp_allowed(chip CHIP, uint16_t reg)
default: { default: {
UNREACHABLE("Unknown GPU"); UNREACHABLE("Unknown GPU");
} }
PRAGMA_DIAGNOSTIC_POP
} }
return true; return true;

View file

@ -1095,7 +1095,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<bitfield name="DIRTY" pos="16" type="boolean"/> <bitfield name="DIRTY" pos="16" type="boolean"/>
<bitfield name="DISABLE" pos="17" type="boolean"/> <bitfield name="DISABLE" pos="17" type="boolean"/>
<bitfield name="DISABLE_ALL_GROUPS" pos="18" type="boolean"/> <bitfield name="DISABLE_ALL_GROUPS" pos="18" type="boolean"/>
<bitfield name="LOAD_IMMED" pos="19" type="boolean"/> <bitfield name="LOAD_IMMED" pos="19" type="boolean" variants="A5XX"/>
<bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/>
<bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/>
<bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/> <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/>

View file

@ -121,6 +121,10 @@ def tab_to(name, value):
tab_count = 1 tab_count = 1
print(name + ('\t' * tab_count) + value) print(name + ('\t' * tab_count) + value)
def define_macro(name, value, has_variants):
if has_variants:
value = "__FD_DEPRECATED " + value
tab_to(name, value)
def mask(low, high): def mask(low, high):
return ((0xffffffffffffffff >> (64 - (high + 1 - low))) << low) return ((0xffffffffffffffff >> (64 - (high + 1 - low))) << low)
@ -258,11 +262,11 @@ class Bitset(object):
# Requires using `fui()` or `_mesa_float_to_half()` # Requires using `fui()` or `_mesa_float_to_half()`
constexpr_mark = "" constexpr_mark = ""
if reg.bit_size == 64: if reg.bit_size == 64:
tab_to(" uint64_t", "unknown;")
tab_to(" uint64_t", "qword;") tab_to(" uint64_t", "qword;")
tab_to(" uint64_t", "unknown;")
else: else:
tab_to(" uint32_t", "unknown;")
tab_to(" uint32_t", "dword;") tab_to(" uint32_t", "dword;")
tab_to(" uint32_t", "unknown;")
print("};\n") print("};\n")
if not has_variants: if not has_variants:
@ -407,11 +411,13 @@ class Array(object):
print("\t\tdefault: return INVALID_IDX(idx);") print("\t\tdefault: return INVALID_IDX(idx);")
print("\t}\n}") print("\t}\n}")
if proto == '': if proto == '':
tab_to("#define REG_%s_%s" % define_macro("#define REG_%s_%s" %
(self.domain, self.name), "0x%08x\n" % array_offset) (self.domain, self.name), "0x%08x\n" % array_offset,
has_variants)
else: else:
tab_to("#define REG_%s_%s(%s)" % (self.domain, self.name, define_macro("#define REG_%s_%s(%s)" % (self.domain, self.name,
proto), "(0x%08x + %s )\n" % (array_offset, strides)) proto), "(0x%08x + %s )\n" % (array_offset, strides),
has_variants)
def dump_pack_struct(self, has_variants): def dump_pack_struct(self, has_variants):
pass pass
@ -466,10 +472,13 @@ class Reg(object):
strides = indices_strides(self.indices()) strides = indices_strides(self.indices())
offset = self.total_offset() offset = self.total_offset()
if proto == '': if proto == '':
tab_to("#define REG_%s" % self.full_name, "0x%08x" % offset) define_macro("#define REG_%s" % self.full_name, "0x%08x" % offset, has_variants)
elif not has_variants: elif not has_variants:
print("static CONSTEXPR inline uint32_t REG_%s(%s) { return 0x%08x + %s; }" % ( depcrstr = ""
self.full_name, proto, offset, strides)) if has_variants:
depcrstr = " __FD_DEPRECATED "
print("static CONSTEXPR inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (
depcrstr, self.full_name, proto, offset, strides))
if self.bitset.inline: if self.bitset.inline:
self.bitset.dump(has_variants, self.full_name, self) self.bitset.dump(has_variants, self.full_name, self)
@ -980,6 +989,15 @@ def dump_c(args, guard, func):
print("#endif") print("#endif")
print() print()
# TODO figure out what to do about fd_reg_stomp_allowed()
# vs gcc.. for now only enable the warnings with clang:
print("#if defined(__clang__) && !defined(FD_NO_DEPRECATED_PACK)")
print("#define __FD_DEPRECATED _Pragma (\"GCC warning \\\"Deprecated reg builder\\\"\")")
print("#else")
print("#define __FD_DEPRECATED")
print("#endif")
print()
func(p) func(p)
print("#endif /* %s */" % guard) print("#endif /* %s */" % guard)

View file

@ -484,21 +484,25 @@ r2d_setup_common(struct tu_cmd_buffer *cmd,
tu_cs_emit_pkt4(cs, REG_A6XX_RB_A2D_PIXEL_CNTL, 1); tu_cs_emit_pkt4(cs, REG_A6XX_RB_A2D_PIXEL_CNTL, 1);
tu_cs_emit(cs, unknown_8c01); // TODO: seem to be always 0 on A7XX tu_cs_emit(cs, unknown_8c01); // TODO: seem to be always 0 on A7XX
uint32_t blit_cntl = A6XX_RB_A2D_BLT_CNTL( tu_cs_emit_regs(cs, A6XX_RB_A2D_BLT_CNTL(
.rotate = (enum a6xx_rotation) blit_param, .rotate = (enum a6xx_rotation) blit_param,
.solid_color = clear, .solid_color = clear,
.color_format = fmt, .color_format = fmt,
.scissor = scissor, .scissor = scissor,
.d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
.mask = 0xf, .mask = 0xf,
.ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt, .ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt,
).value; ));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_A2D_BLT_CNTL, 1); tu_cs_emit_regs(cs, GRAS_A2D_BLT_CNTL(CHIP,
tu_cs_emit(cs, blit_cntl); .rotate = (enum a6xx_rotation) blit_param,
.solid_color = clear,
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_A2D_BLT_CNTL, 1); .color_format = fmt,
tu_cs_emit(cs, blit_cntl); .scissor = scissor,
.d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
.mask = 0xf,
.ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt,
));
if (CHIP > A6XX) { if (CHIP > A6XX) {
tu_cs_emit_regs(cs, TPL1_A2D_BLT_CNTL(CHIP, .raw_copy = false, tu_cs_emit_regs(cs, TPL1_A2D_BLT_CNTL(CHIP, .raw_copy = false,
@ -871,16 +875,12 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, enum r3d_type type,
.cs_bindless = CHIP == A6XX ? 0x1f : 0xff, .cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
.gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,)); .gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,));
tu_crb crb = cs->crb(2 * 5 + 2 * 11); with_crb (cs, 2 * 5 + 2 * 11) {
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_VERTEX, vs); tu6_emit_xs_config<CHIP>(crb, { .vs = vs, .fs = fs });
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_TESS_CTRL, NULL); struct tu_pvtmem_config pvtmem = {};
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_TESS_EVAL, NULL); tu6_emit_xs(crb, cs->device, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova);
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_GEOMETRY, NULL); tu6_emit_xs(crb, cs->device, MESA_SHADER_FRAGMENT, fs, &pvtmem, fs_iova);
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_FRAGMENT, fs); }
struct tu_pvtmem_config pvtmem = {};
tu6_emit_xs(crb, cs->device, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova);
tu6_emit_xs(crb, cs->device, MESA_SHADER_FRAGMENT, fs, &pvtmem, fs_iova);
crb.flush();
tu6_emit_xs_constants(cs, MESA_SHADER_VERTEX, vs, vs_iova); tu6_emit_xs_constants(cs, MESA_SHADER_VERTEX, vs, vs_iova);
tu6_emit_xs_constants(cs, MESA_SHADER_FRAGMENT, fs, fs_iova); tu6_emit_xs_constants(cs, MESA_SHADER_FRAGMENT, fs, fs_iova);
@ -5311,12 +5311,12 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
* save/restore them dynamically. * save/restore them dynamically.
*/ */
tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1); tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1);
tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A6XX_RB_CNTL) | tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(RB_CNTL(CHIP).reg) |
CP_REG_TO_SCRATCH_0_SCRATCH(0) | CP_REG_TO_SCRATCH_0_SCRATCH(0) |
CP_REG_TO_SCRATCH_0_CNT(1 - 1)); CP_REG_TO_SCRATCH_0_CNT(1 - 1));
if (CHIP >= A7XX) { if (CHIP >= A7XX) {
tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1); tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1);
tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A7XX_RB_BUFFER_CNTL) | tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(RB_BUFFER_CNTL(CHIP).reg) |
CP_REG_TO_SCRATCH_0_SCRATCH(1) | CP_REG_TO_SCRATCH_0_SCRATCH(1) |
CP_REG_TO_SCRATCH_0_CNT(1 - 1)); CP_REG_TO_SCRATCH_0_CNT(1 - 1));
} }
@ -5357,18 +5357,18 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
/* Restore RB_CNTL/GRAS_SC_BIN_CNTL saved above. */ /* Restore RB_CNTL/GRAS_SC_BIN_CNTL saved above. */
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_RB_CNTL) | tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(RB_CNTL(CHIP).reg) |
CP_SCRATCH_TO_REG_0_SCRATCH(0) | CP_SCRATCH_TO_REG_0_SCRATCH(0) |
CP_SCRATCH_TO_REG_0_CNT(1 - 1)); CP_SCRATCH_TO_REG_0_CNT(1 - 1));
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_GRAS_SC_BIN_CNTL) | tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(GRAS_SC_BIN_CNTL(CHIP).reg) |
CP_SCRATCH_TO_REG_0_SCRATCH(0) | CP_SCRATCH_TO_REG_0_SCRATCH(0) |
CP_SCRATCH_TO_REG_0_CNT(1 - 1)); CP_SCRATCH_TO_REG_0_CNT(1 - 1));
if (CHIP >= A7XX) { if (CHIP >= A7XX) {
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A7XX_RB_BUFFER_CNTL) | tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(RB_BUFFER_CNTL(CHIP).reg) |
CP_SCRATCH_TO_REG_0_SCRATCH(1) | CP_SCRATCH_TO_REG_0_SCRATCH(1) |
CP_SCRATCH_TO_REG_0_CNT(1 - 1)); CP_SCRATCH_TO_REG_0_CNT(1 - 1));
} }

View file

@ -671,17 +671,32 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
&cmd->state.pass->attachments[a]; &cmd->state.pass->attachments[a];
enum a6xx_depth_format fmt = tu6_pipe2depth(attachment->format); enum a6xx_depth_format fmt = tu6_pipe2depth(attachment->format);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); unsigned depth_pitch, depth_array_pitch;
tu_cs_emit(cs, RB_DEPTH_BUFFER_INFO(CHIP, uint64_t depth_base;
.depth_format = fmt,
.tilemode = TILE6_3, if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
.losslesscompen = iview->view.ubwc_enabled, depth_pitch = iview->depth_pitch;
).value); depth_array_pitch = iview->depth_layer_size;
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) depth_base = iview->depth_base_addr;
tu_cs_image_depth_ref(cs, iview, 0); } else {
else depth_pitch = iview->view.pitch;
tu_cs_image_ref(cs, &iview->view, 0); depth_array_pitch = iview->view.layer_size;
tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0)); depth_base = tu_layer_address(&iview->view, 0);
}
tu_cs_emit_regs(cs,
RB_DEPTH_BUFFER_INFO(CHIP,
.depth_format = fmt,
.tilemode = TILE6_3,
.losslesscompen = iview->view.ubwc_enabled,
),
A6XX_RB_DEPTH_BUFFER_PITCH(depth_pitch),
A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(depth_array_pitch),
A6XX_RB_DEPTH_BUFFER_BASE(depth_base),
A6XX_RB_DEPTH_GMEM_BASE(
tu_attachment_gmem_offset(cmd, attachment, 0)
),
);
tu_cs_emit_regs(cs, GRAS_SU_DEPTH_BUFFER_INFO(CHIP, .depth_format = fmt)); tu_cs_emit_regs(cs, GRAS_SU_DEPTH_BUFFER_INFO(CHIP, .depth_format = fmt));
@ -691,18 +706,31 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT || if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
attachment->format == VK_FORMAT_S8_UINT) { attachment->format == VK_FORMAT_S8_UINT) {
tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_BUFFER_INFO, 6); unsigned stencil_pitch, stencil_array_pitch, stencil_gmem_offset;
tu_cs_emit(cs, RB_STENCIL_BUFFER_INFO(CHIP, uint64_t stencil_base;
.separate_stencil = true,
.tilemode = TILE6_3,
).value);
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
tu_cs_image_stencil_ref(cs, iview, 0); stencil_pitch = iview->stencil_pitch;
tu_cs_emit(cs, tu_attachment_gmem_offset_stencil(cmd, attachment, 0)); stencil_array_pitch = iview->stencil_layer_size;
stencil_base = iview->stencil_base_addr;
stencil_gmem_offset = tu_attachment_gmem_offset_stencil(cmd, attachment, 0);
} else { } else {
tu_cs_image_ref(cs, &iview->view, 0); stencil_pitch = iview->view.pitch;
tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0)); stencil_array_pitch = iview->view.layer_size;
stencil_base = tu_layer_address(&iview->view, 0);
stencil_gmem_offset = tu_attachment_gmem_offset(cmd, attachment, 0);
} }
tu_cs_emit_regs(cs,
RB_STENCIL_BUFFER_INFO(CHIP,
.separate_stencil = true,
.tilemode = TILE6_3,
),
A6XX_RB_STENCIL_BUFFER_PITCH(stencil_pitch),
A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(stencil_array_pitch),
A6XX_RB_STENCIL_BUFFER_BASE(stencil_base),
A6XX_RB_STENCIL_GMEM_BASE(stencil_gmem_offset),
);
} else { } else {
tu_cs_emit_regs(cs, tu_cs_emit_regs(cs,
RB_STENCIL_BUFFER_INFO(CHIP, 0)); RB_STENCIL_BUFFER_INFO(CHIP, 0));
@ -898,7 +926,7 @@ tu6_emit_render_cntl<A6XX>(struct tu_cmd_buffer *cmd,
} }
if (no_track) { if (no_track) {
tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CNTL, 1); tu_cs_emit_pkt4(cs, RB_RENDER_CNTL(A6XX).reg, 1);
tu_cs_emit(cs, cntl); tu_cs_emit(cs, cntl);
return; return;
} }
@ -917,7 +945,7 @@ tu6_emit_render_cntl<A6XX>(struct tu_cmd_buffer *cmd,
tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL)); tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL); tu_cs_emit(cs, RB_RENDER_CNTL(A6XX).reg);
tu_cs_emit(cs, cntl); tu_cs_emit(cs, cntl);
} }
@ -2033,13 +2061,13 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
tu_cs_emit_write_reg(cs, REG_A6XX_SP_NC_MODE_CNTL_2, 0); tu_cs_emit_write_reg(cs, REG_A6XX_SP_NC_MODE_CNTL_2, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_PERFCTR_SHADER_MASK, 0x3f); tu_cs_emit_write_reg(cs, REG_A6XX_SP_PERFCTR_SHADER_MASK, 0x3f);
if (CHIP == A6XX && !cs->device->physical_device->info->props.is_a702) if (CHIP == A6XX && !cs->device->physical_device->info->props.is_a702)
tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_UNKNOWN_B605, 0x44); tu_cs_emit_regs(cs, TPL1_UNKNOWN_B605(CHIP, .dword = 0x44));
if (CHIP == A6XX) { if (CHIP == A6XX) {
tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80); tu_cs_emit_regs(cs, HLSQ_UNKNOWN_BE00(CHIP, .dword = 0x80));
tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0); tu_cs_emit_regs(cs, HLSQ_UNKNOWN_BE01(CHIP));
} }
tu_cs_emit_write_reg(cs, REG_A6XX_SP_GFX_USIZE, 0); // 2 on a740 ??? tu_cs_emit_regs(cs, SP_GFX_USIZE(CHIP));
tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_PS_ROTATION_CNTL, 0); tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_PS_ROTATION_CNTL, 0);
if (CHIP == A6XX) if (CHIP == A6XX)
tu_cs_emit_regs(cs, HLSQ_SHARED_CONSTS(CHIP, .enable = false)); tu_cs_emit_regs(cs, HLSQ_SHARED_CONSTS(CHIP, .enable = false));
@ -2062,9 +2090,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0);
tu_cs_emit_regs(cs, RB_UNKNOWN_88F0(CHIP));
} }
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0);
tu_cs_emit_regs(cs, VPC_REPLACE_MODE_CNTL(CHIP, false)); tu_cs_emit_regs(cs, VPC_REPLACE_MODE_CNTL(CHIP, false));
tu_cs_emit_regs(cs, VPC_ROTATION_CNTL(CHIP)); tu_cs_emit_regs(cs, VPC_ROTATION_CNTL(CHIP));
@ -2078,10 +2107,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
tu_cs_emit_regs(cs, GRAS_SU_CONSERVATIVE_RAS_CNTL(CHIP, 0)); tu_cs_emit_regs(cs, GRAS_SU_CONSERVATIVE_RAS_CNTL(CHIP, 0));
tu_cs_emit_regs(cs, PC_DGEN_SU_CONSERVATIVE_RAS_CNTL(CHIP)); tu_cs_emit_regs(cs, PC_DGEN_SU_CONSERVATIVE_RAS_CNTL(CHIP));
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0); tu_cs_emit_regs(cs, VPC_UNKNOWN_9210(CHIP));
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0); tu_cs_emit_regs(cs, VPC_UNKNOWN_9211(CHIP));
} }
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_LB_MODE_CNTL, 0); tu_cs_emit_regs(cs, VPC_LB_MODE_CNTL(CHIP));
tu_cs_emit_regs(cs, PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE(CHIP)); tu_cs_emit_regs(cs, PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE(CHIP));
tu_cs_emit_regs(cs, A6XX_TPL1_MODE_CNTL(.isammode = ISAMMODE_GL, tu_cs_emit_regs(cs, A6XX_TPL1_MODE_CNTL(.isammode = ISAMMODE_GL,
.texcoordroundmode = dev->instance->use_tex_coord_round_nearest_even_mode .texcoordroundmode = dev->instance->use_tex_coord_round_nearest_even_mode
@ -2152,9 +2181,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
* zero-instance draw calls. See IR3_CONST_ALLOC_DRIVER_PARAMS allocation * zero-instance draw calls. See IR3_CONST_ALLOC_DRIVER_PARAMS allocation
* for more info. * for more info.
*/ */
tu_cs_emit_pkt4( tu_cs_emit_regs(cs, SP_VS_CONST_CONFIG(CHIP,
cs, CHIP == A6XX ? REG_A6XX_SP_VS_CONST_CONFIG : REG_A7XX_SP_VS_CONST_CONFIG, 1); .constlen = 8,
tu_cs_emit(cs, A6XX_SP_VS_CONST_CONFIG_CONSTLEN(8) | A6XX_SP_VS_CONST_CONFIG_ENABLED); .enabled = true,
));
} }
/* Emit the bin restore preamble, which runs in between bins when L1 /* Emit the bin restore preamble, which runs in between bins when L1
@ -9096,7 +9126,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
* previous dispatches to finish. * previous dispatches to finish.
*/ */
tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3); tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_SP_CS_NDRANGE_1)); tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(SP_CS_NDRANGE_1(CHIP).reg));
tu_cs_emit_qw(cs, info->indirect); tu_cs_emit_qw(cs, info->indirect);
tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2); tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2);
@ -9121,7 +9151,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
CP_REG_RMW_0_SKIP_WAIT_FOR_ME | CP_REG_RMW_0_SKIP_WAIT_FOR_ME |
CP_REG_RMW_0_SRC0_IS_REG | CP_REG_RMW_0_SRC0_IS_REG |
CP_REG_RMW_0_SRC1_ADD); CP_REG_RMW_0_SRC1_ADD);
tu_cs_emit(cs, REG_A7XX_SP_CS_NDRANGE_1); /* SRC0 */ tu_cs_emit(cs, SP_CS_NDRANGE_1(CHIP).reg); /* SRC0 */
tu_cs_emit(cs, -1); /* SRC1 */ tu_cs_emit(cs, -1); /* SRC1 */
/* scratch0 = ((scratch0 & (local_size - 1)) rot 2 /* scratch0 = ((scratch0 & (local_size - 1)) rot 2
@ -9139,7 +9169,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
/* write scratch0 to SP_CS_NDRANGE_7 */ /* write scratch0 to SP_CS_NDRANGE_7 */
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
tu_cs_emit(cs, tu_cs_emit(cs,
CP_SCRATCH_TO_REG_0_REG(REG_A7XX_SP_CS_NDRANGE_7) | CP_SCRATCH_TO_REG_0_REG(SP_CS_NDRANGE_7(CHIP).reg) |
CP_SCRATCH_TO_REG_0_SCRATCH(0)); CP_SCRATCH_TO_REG_0_SCRATCH(0));
tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2); tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2);
@ -9157,7 +9187,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
CP_REG_RMW_0_SKIP_WAIT_FOR_ME | CP_REG_RMW_0_SKIP_WAIT_FOR_ME |
CP_REG_RMW_0_SRC0_IS_REG | CP_REG_RMW_0_SRC0_IS_REG |
CP_REG_RMW_0_SRC1_ADD); CP_REG_RMW_0_SRC1_ADD);
tu_cs_emit(cs, REG_A7XX_SP_CS_NDRANGE_1); /* SRC0 */ tu_cs_emit(cs, SP_CS_NDRANGE_1(CHIP).reg); /* SRC0 */
tu_cs_emit(cs, local_size[0] - 1); /* SRC1 */ tu_cs_emit(cs, local_size[0] - 1); /* SRC1 */
unsigned local_size_log2 = util_logbase2(local_size[0]); unsigned local_size_log2 = util_logbase2(local_size[0]);
@ -9179,7 +9209,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
/* write scratch0 to SP_CS_KERNEL_GROUP_X */ /* write scratch0 to SP_CS_KERNEL_GROUP_X */
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
tu_cs_emit(cs, tu_cs_emit(cs,
CP_SCRATCH_TO_REG_0_REG(REG_A7XX_SP_CS_KERNEL_GROUP_X) | CP_SCRATCH_TO_REG_0_REG(SP_CS_KERNEL_GROUP_X(CHIP).reg) |
CP_SCRATCH_TO_REG_0_SCRATCH(0)); CP_SCRATCH_TO_REG_0_SCRATCH(0));
} else { } else {
tu_cs_emit_regs(cs, tu_cs_emit_regs(cs,

View file

@ -147,30 +147,6 @@ tu_layer_flag_address(const struct fdl6_view *iview, uint32_t layer)
return iview->ubwc_addr + iview->ubwc_layer_size * layer; return iview->ubwc_addr + iview->ubwc_layer_size * layer;
} }
void
tu_cs_image_ref(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer)
{
tu_cs_emit(cs, A6XX_RB_MRT_PITCH(0, iview->pitch).value);
tu_cs_emit(cs, iview->layer_size >> 6);
tu_cs_emit_qw(cs, tu_layer_address(iview, layer));
}
void
tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
{
tu_cs_emit(cs, A6XX_RB_STENCIL_BUFFER_PITCH(iview->stencil_pitch).value);
tu_cs_emit(cs, iview->stencil_layer_size >> 6);
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
}
void
tu_cs_image_depth_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
{
tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(iview->depth_pitch).value);
tu_cs_emit(cs, iview->depth_layer_size >> 6);
tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
}
template <chip CHIP> template <chip CHIP>
void void
tu_cs_image_ref_2d(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, bool src) tu_cs_image_ref_2d(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, bool src)

View file

@ -732,14 +732,14 @@ tu_lrz_before_sysmem_br(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit(cs, if_dwords + 1); tu_cs_emit(cs, if_dwords + 1);
/* GRAS_LRZ_DEPTH_CLEAR = lrz_fc->buffer[1].depth_clear_val */ /* GRAS_LRZ_DEPTH_CLEAR = lrz_fc->buffer[1].depth_clear_val */
tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3); tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_GRAS_LRZ_DEPTH_CLEAR)); tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(GRAS_LRZ_DEPTH_CLEAR(CHIP).reg));
tu_cs_emit_qw(cs, lrz_fc_iova + offsetof(fd_lrzfc_layout<A7XX>, tu_cs_emit_qw(cs, lrz_fc_iova + offsetof(fd_lrzfc_layout<A7XX>,
buffer[1].depth_clear_val)); buffer[1].depth_clear_val));
/* } else { */ /* } else { */
tu_cs_emit_pkt7(cs, CP_NOP, else_dwords); tu_cs_emit_pkt7(cs, CP_NOP, else_dwords);
/* GRAS_LRZ_DEPTH_CLEAR = lrz_fc->buffer[0].depth_clear_val */ /* GRAS_LRZ_DEPTH_CLEAR = lrz_fc->buffer[0].depth_clear_val */
tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3); tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_GRAS_LRZ_DEPTH_CLEAR)); tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(GRAS_LRZ_DEPTH_CLEAR(CHIP).reg));
tu_cs_emit_qw(cs, lrz_fc_iova + offsetof(fd_lrzfc_layout<A7XX>, tu_cs_emit_qw(cs, lrz_fc_iova + offsetof(fd_lrzfc_layout<A7XX>,
buffer[0].depth_clear_val)); buffer[0].depth_clear_val));
/* } */ /* } */

View file

@ -338,71 +338,72 @@ tu_push_consts_type(const struct tu_pipeline_layout *layout,
} }
} }
template <chip CHIP> static uint32_t
struct xs_config { sp_xs_config(const struct ir3_shader_variant *v)
uint16_t reg_sp_xs_config; {
uint16_t reg_hlsq_xs_ctrl; if (!v)
}; return 0;
template <chip CHIP> return A6XX_SP_VS_CONFIG_ENABLED |
static const xs_config<CHIP> xs_configs[] = { COND(v->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
[MESA_SHADER_VERTEX] = { COND(v->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
REG_A6XX_SP_VS_CONFIG, COND(v->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_UAV) |
CHIP == A6XX ? REG_A6XX_SP_VS_CONST_CONFIG : REG_A7XX_SP_VS_CONST_CONFIG, COND(v->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
}, A6XX_SP_VS_CONFIG_NUAV(ir3_shader_num_uavs(v)) |
[MESA_SHADER_TESS_CTRL] = { A6XX_SP_VS_CONFIG_NTEX(v->num_samp) |
REG_A6XX_SP_HS_CONFIG, A6XX_SP_VS_CONFIG_NSAMP(v->num_samp);
CHIP == A6XX ? REG_A6XX_SP_HS_CONST_CONFIG : REG_A7XX_SP_HS_CONST_CONFIG, }
},
[MESA_SHADER_TESS_EVAL] = { static bool
REG_A6XX_SP_DS_CONFIG, push_shared_consts(const struct ir3_shader_variant *v)
CHIP == A6XX ? REG_A6XX_SP_DS_CONST_CONFIG : REG_A7XX_SP_DS_CONST_CONFIG, {
}, return v && v->shader_options.push_consts_type == IR3_PUSH_CONSTS_SHARED_PREAMBLE;
[MESA_SHADER_GEOMETRY] = { }
REG_A6XX_SP_GS_CONFIG,
CHIP == A6XX ? REG_A6XX_SP_GS_CONST_CONFIG : REG_A7XX_SP_GS_CONST_CONFIG,
},
[MESA_SHADER_FRAGMENT] = {
REG_A6XX_SP_PS_CONFIG,
CHIP == A6XX ? REG_A6XX_SP_PS_CONST_CONFIG : REG_A7XX_SP_PS_CONST_CONFIG,
},
[MESA_SHADER_COMPUTE] = {
REG_A6XX_SP_CS_CONFIG,
CHIP == A6XX ? REG_A6XX_SP_CS_CONST_CONFIG : REG_A7XX_SP_CS_CONST_CONFIG,
},
};
template <chip CHIP> template <chip CHIP>
void void
tu6_emit_xs_config(struct tu_crb &crb, tu6_emit_xs_config(struct tu_crb &crb, struct tu_shader_stages stages)
mesa_shader_stage stage, /* xs->type, but xs may be NULL */
const struct ir3_shader_variant *xs)
{ {
const struct xs_config<CHIP> *cfg = &xs_configs<CHIP>[stage]; if (stages.cs) {
crb.add(SP_CS_CONST_CONFIG(CHIP,
.constlen = stages.cs->constlen,
.enabled = true,
.read_imm_shared_consts = push_shared_consts(stages.cs),
));
crb.add(A6XX_SP_CS_CONFIG(.dword = sp_xs_config(stages.cs)));
} else {
crb.add(SP_VS_CONST_CONFIG(CHIP,
.constlen = COND(stages.vs, stages.vs->constlen),
.enabled = stages.vs,
.read_imm_shared_consts = push_shared_consts(stages.vs),
));
crb.add(SP_HS_CONST_CONFIG(CHIP,
.constlen = COND(stages.hs, stages.hs->constlen),
.enabled = stages.hs,
.read_imm_shared_consts = push_shared_consts(stages.hs),
));
crb.add(SP_DS_CONST_CONFIG(CHIP,
.constlen = COND(stages.ds, stages.ds->constlen),
.enabled = stages.ds,
.read_imm_shared_consts = push_shared_consts(stages.ds),
));
crb.add(SP_GS_CONST_CONFIG(CHIP,
.constlen = COND(stages.gs, stages.gs->constlen),
.enabled = stages.gs,
.read_imm_shared_consts = push_shared_consts(stages.gs),
));
crb.add(SP_PS_CONST_CONFIG(CHIP,
.constlen = COND(stages.fs, stages.fs->constlen),
.enabled = stages.fs,
.read_imm_shared_consts = push_shared_consts(stages.fs),
));
if (!xs) { crb.add(A6XX_SP_VS_CONFIG(.dword = sp_xs_config(stages.vs)));
/* shader stage disabled */ crb.add(A6XX_SP_HS_CONFIG(.dword = sp_xs_config(stages.hs)));
crb.add(tu_reg_value { .reg = cfg->reg_sp_xs_config, .value = 0 }); crb.add(A6XX_SP_DS_CONFIG(.dword = sp_xs_config(stages.ds)));
crb.add(tu_reg_value { .reg = cfg->reg_hlsq_xs_ctrl, .value = 0 }); crb.add(A6XX_SP_GS_CONFIG(.dword = sp_xs_config(stages.gs)));
return; crb.add(A6XX_SP_PS_CONFIG(.dword = sp_xs_config(stages.fs)));
} }
crb.add(tu_reg_value {
.reg = cfg->reg_sp_xs_config,
.value = A6XX_SP_VS_CONFIG_ENABLED |
COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_UAV) |
COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) |
A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp) });
crb.add(tu_reg_value {
.reg = cfg->reg_hlsq_xs_ctrl,
.value = A6XX_SP_VS_CONST_CONFIG_CONSTLEN(xs->constlen) |
A6XX_SP_VS_CONST_CONFIG_ENABLED |
COND(xs->shader_options.push_consts_type ==
IR3_PUSH_CONSTS_SHARED_PREAMBLE,
A7XX_SP_VS_CONST_CONFIG_READ_IMM_SHARED_CONSTS) });
} }
TU_GENX(tu6_emit_xs_config); TU_GENX(tu6_emit_xs_config);
@ -782,73 +783,6 @@ tu6_emit_vpc(struct tu_cs *cs,
const struct ir3_shader_variant *gs, const struct ir3_shader_variant *gs,
const struct ir3_shader_variant *fs) const struct ir3_shader_variant *fs)
{ {
/* note: doesn't compile as static because of the array regs.. */
const struct reg_config {
uint16_t reg_sp_xs_out_reg;
uint16_t reg_sp_xs_vpc_dst_reg;
uint16_t reg_vpc_xs_pack;
uint16_t reg_vpc_xs_clip_cntl;
uint16_t reg_vpc_xs_clip_cntl_v2;
uint16_t reg_gras_xs_cl_cntl;
uint16_t reg_pc_xs_out_cntl;
uint16_t reg_sp_xs_primitive_cntl;
uint16_t reg_vpc_xs_layer_cntl;
uint16_t reg_vpc_xs_layer_cntl_v2;
uint16_t reg_gras_xs_layer_cntl;
} reg_config[] = {
[MESA_SHADER_VERTEX] = {
REG_A6XX_SP_VS_OUTPUT_REG(0),
REG_A6XX_SP_VS_VPC_DEST_REG(0),
REG_A6XX_VPC_VS_CNTL,
REG_A6XX_VPC_VS_CLIP_CULL_CNTL,
REG_A6XX_VPC_VS_CLIP_CULL_CNTL_V2,
REG_A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE,
REG_A6XX_PC_VS_CNTL,
REG_A6XX_SP_VS_OUTPUT_CNTL,
REG_A6XX_VPC_VS_SIV_CNTL,
REG_A6XX_VPC_VS_SIV_CNTL_V2,
REG_A6XX_GRAS_SU_VS_SIV_CNTL,
},
[MESA_SHADER_TESS_CTRL] = {
0,
0,
0,
0,
0,
0,
REG_A6XX_PC_HS_CNTL,
0,
0,
0
},
[MESA_SHADER_TESS_EVAL] = {
REG_A6XX_SP_DS_OUTPUT_REG(0),
REG_A6XX_SP_DS_VPC_DEST_REG(0),
REG_A6XX_VPC_DS_CNTL,
REG_A6XX_VPC_DS_CLIP_CULL_CNTL,
REG_A6XX_VPC_DS_CLIP_CULL_CNTL_V2,
REG_A6XX_GRAS_CL_DS_CLIP_CULL_DISTANCE,
REG_A6XX_PC_DS_CNTL,
REG_A6XX_SP_DS_OUTPUT_CNTL,
REG_A6XX_VPC_DS_SIV_CNTL,
REG_A6XX_VPC_DS_SIV_CNTL_V2,
REG_A6XX_GRAS_SU_DS_SIV_CNTL,
},
[MESA_SHADER_GEOMETRY] = {
REG_A6XX_SP_GS_OUTPUT_REG(0),
REG_A6XX_SP_GS_VPC_DEST_REG(0),
REG_A6XX_VPC_GS_CNTL,
REG_A6XX_VPC_GS_CLIP_CULL_CNTL,
REG_A6XX_VPC_GS_CLIP_CULL_CNTL_V2,
REG_A6XX_GRAS_CL_GS_CLIP_CULL_DISTANCE,
REG_A6XX_PC_GS_CNTL,
REG_A6XX_SP_GS_OUTPUT_CNTL,
REG_A6XX_VPC_GS_SIV_CNTL,
REG_A6XX_VPC_GS_SIV_CNTL_V2,
REG_A6XX_GRAS_SU_GS_SIV_CNTL,
},
};
const struct ir3_shader_variant *last_shader; const struct ir3_shader_variant *last_shader;
if (gs) { if (gs) {
last_shader = gs; last_shader = gs;
@ -858,8 +792,6 @@ tu6_emit_vpc(struct tu_cs *cs,
last_shader = vs; last_shader = vs;
} }
const struct reg_config *cfg = &reg_config[last_shader->type];
struct ir3_shader_linkage linkage = { struct ir3_shader_linkage linkage = {
.primid_loc = 0xff, .primid_loc = 0xff,
.clip0_loc = 0xff, .clip0_loc = 0xff,
@ -961,6 +893,8 @@ tu6_emit_vpc(struct tu_cs *cs,
if (linkage.cnt == 0) if (linkage.cnt == 0)
ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc); ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc);
tu6_emit_vpc_varying_modes<CHIP>(cs, fs, last_shader);
/* map outputs of the last shader to VPC */ /* map outputs of the last shader to VPC */
assert(linkage.cnt <= 32); assert(linkage.cnt <= 32);
const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2); const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2);
@ -975,30 +909,121 @@ tu6_emit_vpc(struct tu_cs *cs,
A6XX_SP_VS_VPC_DEST_REG_OUTLOC0(linkage.var[i].loc); A6XX_SP_VS_VPC_DEST_REG_OUTLOC0(linkage.var[i].loc);
} }
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count); tu_crb crb = cs->crb(sp_out_count + sp_vpc_dst_count + 12);
tu_cs_emit_array(cs, sp_out, sp_out_count); uint32_t *regs;
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count); switch (last_shader->type) {
tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count); case MESA_SHADER_VERTEX:
regs = (uint32_t *)sp_out;
for (unsigned i = 0; i < sp_out_count; i++)
crb.add(A6XX_SP_VS_OUTPUT_REG(i, .dword = regs[i]));
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1); regs = (uint32_t *)sp_vpc_dst;
tu_cs_emit(cs, A6XX_VPC_VS_CNTL_POSITIONLOC(position_loc) | for (unsigned i = 0; i < sp_vpc_dst_count; i++)
A6XX_VPC_VS_CNTL_PSIZELOC(pointsize_loc) | crb.add(A6XX_SP_VS_VPC_DEST_REG(i, .dword = regs[i]));
A6XX_VPC_VS_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
A6XX_VPC_VS_CNTL_EXTRAPOS(extra_pos));
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1); crb.add(VPC_VS_CNTL(CHIP,
tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_MASK(clip_cull_mask) | .stride_in_vpc = linkage.max_loc,
A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_03_LOC(clip0_loc) | .positionloc = position_loc,
A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_47_LOC(clip1_loc)); .psizeloc = pointsize_loc,
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl_v2, 1); .extrapos = extra_pos,
tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_MASK(clip_cull_mask) | ));
A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_47_LOC(clip1_loc));
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1); crb.add(VPC_VS_CLIP_CULL_CNTL(CHIP,
tu_cs_emit(cs, A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE_CLIP_MASK(last_shader->clip_mask) | .clip_mask = clip_cull_mask,
A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE_CULL_MASK(last_shader->cull_mask)); .clip_dist_03_loc = clip0_loc,
.clip_dist_47_loc = clip1_loc,
));
if (CHIP <= A7XX) {
crb.add(VPC_VS_CLIP_CULL_CNTL_V2(CHIP,
.clip_mask = clip_cull_mask,
.clip_dist_03_loc = clip0_loc,
.clip_dist_47_loc = clip1_loc,
));
}
crb.add(GRAS_CL_VS_CLIP_CULL_DISTANCE(CHIP,
.clip_mask = last_shader->clip_mask,
.cull_mask = last_shader->cull_mask,
));
break;
case MESA_SHADER_TESS_EVAL:
regs = (uint32_t *)sp_out;
for (unsigned i = 0; i < sp_out_count; i++)
crb.add(A6XX_SP_DS_OUTPUT_REG(i, .dword = regs[i]));
regs = (uint32_t *)sp_vpc_dst;
for (unsigned i = 0; i < sp_vpc_dst_count; i++)
crb.add(A6XX_SP_DS_VPC_DEST_REG(i, .dword = regs[i]));
crb.add(VPC_DS_CNTL(CHIP,
.stride_in_vpc = linkage.max_loc,
.positionloc = position_loc,
.psizeloc = pointsize_loc,
.extrapos = extra_pos,
));
crb.add(VPC_DS_CLIP_CULL_CNTL(CHIP,
.clip_mask = clip_cull_mask,
.clip_dist_03_loc = clip0_loc,
.clip_dist_47_loc = clip1_loc,
));
if (CHIP <= A7XX) {
crb.add(VPC_DS_CLIP_CULL_CNTL_V2(CHIP,
.clip_mask = clip_cull_mask,
.clip_dist_03_loc = clip0_loc,
.clip_dist_47_loc = clip1_loc,
));
}
crb.add(GRAS_CL_DS_CLIP_CULL_DISTANCE(CHIP,
.clip_mask = last_shader->clip_mask,
.cull_mask = last_shader->cull_mask,
));
break;
case MESA_SHADER_GEOMETRY:
regs = (uint32_t *)sp_out;
for (unsigned i = 0; i < sp_out_count; i++)
crb.add(A6XX_SP_GS_OUTPUT_REG(i, .dword = regs[i]));
regs = (uint32_t *)sp_vpc_dst;
for (unsigned i = 0; i < sp_vpc_dst_count; i++)
crb.add(A6XX_SP_GS_VPC_DEST_REG(i, .dword = regs[i]));
crb.add(VPC_GS_CNTL(CHIP,
.stride_in_vpc = linkage.max_loc,
.positionloc = position_loc,
.psizeloc = pointsize_loc,
.extrapos = extra_pos,
));
crb.add(VPC_GS_CLIP_CULL_CNTL(CHIP,
.clip_mask = clip_cull_mask,
.clip_dist_03_loc = clip0_loc,
.clip_dist_47_loc = clip1_loc,
));
if (CHIP <= A7XX) {
crb.add(VPC_GS_CLIP_CULL_CNTL_V2(CHIP,
.clip_mask = clip_cull_mask,
.clip_dist_03_loc = clip0_loc,
.clip_dist_47_loc = clip1_loc,
));
}
crb.add(GRAS_CL_GS_CLIP_CULL_DISTANCE(CHIP,
.clip_mask = last_shader->clip_mask,
.cull_mask = last_shader->cull_mask,
));
break;
default:
UNREACHABLE("bad last_shader type");
}
const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs }; const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs };
@ -1009,18 +1034,50 @@ tu6_emit_vpc(struct tu_cs *cs,
bool primid = shader->type != MESA_SHADER_VERTEX && bool primid = shader->type != MESA_SHADER_VERTEX &&
VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID)); VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID));
bool last = shader == last_shader;
tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1);
if (shader == last_shader) { switch (shader->type) {
tu_cs_emit(cs, A6XX_PC_VS_CNTL_STRIDE_IN_VPC(linkage.max_loc) | case MESA_SHADER_VERTEX:
CONDREG(pointsize_regid, A6XX_PC_VS_CNTL_PSIZE) | crb.add(PC_VS_CNTL(CHIP,
CONDREG(layer_regid, A6XX_PC_VS_CNTL_LAYER) | .stride_in_vpc = COND(last, linkage.max_loc),
CONDREG(view_regid, A6XX_PC_VS_CNTL_VIEW) | .psize = COND(last, VALIDREG(pointsize_regid)),
COND(primid, A6XX_PC_VS_CNTL_PRIMITIVE_ID) | .layer = COND(last, VALIDREG(layer_regid)),
A6XX_PC_VS_CNTL_CLIP_MASK(clip_cull_mask) | .view = COND(last, VALIDREG(view_regid)),
CONDREG(shading_rate_regid, A6XX_PC_VS_CNTL_SHADINGRATE)); .primitive_id = primid,
} else { .clip_mask = COND(last, clip_cull_mask),
tu_cs_emit(cs, COND(primid, A6XX_PC_VS_CNTL_PRIMITIVE_ID)); .shadingrate = COND(last, VALIDREG(shading_rate_regid)),
));
break;
case MESA_SHADER_TESS_CTRL:
assert(!last);
crb.add(PC_HS_CNTL(CHIP,
.primitive_id = primid,
));
case MESA_SHADER_TESS_EVAL:
crb.add(PC_DS_CNTL(CHIP,
.stride_in_vpc = COND(last, linkage.max_loc),
.psize = COND(last, VALIDREG(pointsize_regid)),
.layer = COND(last, VALIDREG(layer_regid)),
.view = COND(last, VALIDREG(view_regid)),
.primitive_id = primid,
.clip_mask = COND(last, clip_cull_mask),
.shadingrate = COND(last, VALIDREG(shading_rate_regid)),
));
break;
case MESA_SHADER_GEOMETRY:
crb.add(PC_GS_CNTL(CHIP,
.stride_in_vpc = COND(last, linkage.max_loc),
.psize = COND(last, VALIDREG(pointsize_regid)),
.layer = COND(last, VALIDREG(layer_regid)),
.view = COND(last, VALIDREG(view_regid)),
.primitive_id = primid,
.clip_mask = COND(last, clip_cull_mask),
.shadingrate = COND(last, VALIDREG(shading_rate_regid)),
));
break;
default:
break;
} }
} }
@ -1028,24 +1085,67 @@ tu6_emit_vpc(struct tu_cs *cs,
if (gs) if (gs)
assert(flags_regid != INVALID_REG); assert(flags_regid != INVALID_REG);
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1); switch (last_shader->type) {
tu_cs_emit(cs, A6XX_SP_VS_OUTPUT_CNTL_OUT(linkage.cnt) | case MESA_SHADER_VERTEX:
A6XX_SP_GS_OUTPUT_CNTL_FLAGS_REGID(flags_regid)); crb.add(A6XX_SP_VS_OUTPUT_CNTL(.out = linkage.cnt));
crb.add(VPC_VS_SIV_CNTL(CHIP,
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1); .layerloc = layer_loc,
tu_cs_emit(cs, A6XX_VPC_VS_SIV_CNTL_LAYERLOC(layer_loc) | .viewloc = view_loc,
A6XX_VPC_VS_SIV_CNTL_VIEWLOC(view_loc) | .shadingrateloc = shading_rate_loc,
A6XX_VPC_VS_SIV_CNTL_SHADINGRATELOC(shading_rate_loc)); ));
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl_v2, 1); if (CHIP <= A7XX) {
tu_cs_emit(cs, A6XX_VPC_VS_SIV_CNTL_LAYERLOC(layer_loc) | crb.add(VPC_VS_SIV_CNTL_V2(CHIP,
A6XX_VPC_VS_SIV_CNTL_VIEWLOC(view_loc) | .layerloc = layer_loc,
A6XX_VPC_VS_SIV_CNTL_SHADINGRATELOC(shading_rate_loc)); .viewloc = view_loc,
.shadingrateloc = shading_rate_loc,
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1); ));
tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_SU_VS_SIV_CNTL_WRITES_LAYER) | }
CONDREG(view_regid, A6XX_GRAS_SU_VS_SIV_CNTL_WRITES_VIEW)); crb.add(GRAS_SU_VS_SIV_CNTL(CHIP,
.writes_layer = VALIDREG(layer_regid),
tu6_emit_vpc_varying_modes<CHIP>(cs, fs, last_shader); .writes_view = VALIDREG(view_regid),
));
break;
case MESA_SHADER_TESS_EVAL:
crb.add(A6XX_SP_DS_OUTPUT_CNTL(.out = linkage.cnt));
crb.add(VPC_DS_SIV_CNTL(CHIP,
.layerloc = layer_loc,
.viewloc = view_loc,
.shadingrateloc = shading_rate_loc,
));
if (CHIP <= A7XX) {
crb.add(VPC_DS_SIV_CNTL_V2(CHIP,
.layerloc = layer_loc,
.viewloc = view_loc,
.shadingrateloc = shading_rate_loc,
));
}
crb.add(GRAS_SU_DS_SIV_CNTL(CHIP,
.writes_layer = VALIDREG(layer_regid),
.writes_view = VALIDREG(view_regid),
));
break;
case MESA_SHADER_GEOMETRY:
crb.add(A6XX_SP_GS_OUTPUT_CNTL(.out = linkage.cnt, .flags_regid = flags_regid));
crb.add(VPC_GS_SIV_CNTL(CHIP,
.layerloc = layer_loc,
.viewloc = view_loc,
.shadingrateloc = shading_rate_loc,
));
if (CHIP <= A7XX) {
crb.add(VPC_GS_SIV_CNTL_V2(CHIP,
.layerloc = layer_loc,
.viewloc = view_loc,
.shadingrateloc = shading_rate_loc,
));
}
crb.add(GRAS_SU_GS_SIV_CNTL(CHIP,
.writes_layer = VALIDREG(layer_regid),
.writes_view = VALIDREG(view_regid),
));
break;
default:
UNREACHABLE("bad last_shader type");
}
} }
TU_GENX(tu6_emit_vpc); TU_GENX(tu6_emit_vpc);
@ -1159,8 +1259,7 @@ tu6_emit_patch_control_points(struct tu_cs *cs,
patch_control_points * vs->variant->output_size / 4; patch_control_points * vs->variant->output_size / 4;
/* Total attribute slots in HS incoming patch. */ /* Total attribute slots in HS incoming patch. */
tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_PARAM_1, 1); tu_cs_emit_regs(cs, PC_HS_PARAM_1(CHIP, patch_local_mem_size_16b));
tu_cs_emit(cs, patch_local_mem_size_16b);
const uint32_t wavesize = 64; const uint32_t wavesize = 64;
const uint32_t vs_hs_local_mem_size = 16384; const uint32_t vs_hs_local_mem_size = 16384;
@ -1266,11 +1365,14 @@ tu6_emit_program_config(struct tu_cs *cs,
.ds_state = true, .gs_state = true, .ds_state = true, .gs_state = true,
.fs_state = true, .gfx_uav = true, .fs_state = true, .gfx_uav = true,
.gfx_shared_const = shared_consts_enable)); .gfx_shared_const = shared_consts_enable));
for (size_t stage_idx = MESA_SHADER_VERTEX;
stage_idx <= MESA_SHADER_FRAGMENT; stage_idx++) { const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX];
mesa_shader_stage stage = (mesa_shader_stage) stage_idx; const struct ir3_shader_variant *hs = variants[MESA_SHADER_TESS_CTRL];
tu6_emit_xs_config<CHIP>(crb, stage, variants[stage]); const struct ir3_shader_variant *ds = variants[MESA_SHADER_TESS_EVAL];
} const struct ir3_shader_variant *gs = variants[MESA_SHADER_GEOMETRY];
const struct ir3_shader_variant *fs = variants[MESA_SHADER_FRAGMENT];
tu6_emit_xs_config<CHIP>(crb, { .vs = vs, .hs = hs, .ds = ds, .gs = gs, .fs = fs });
crb.flush(); crb.flush();
@ -1280,11 +1382,6 @@ tu6_emit_program_config(struct tu_cs *cs,
tu6_emit_dynamic_offset(cs, variants[stage], shaders[stage], prog); tu6_emit_dynamic_offset(cs, variants[stage], shaders[stage], prog);
} }
const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX];
const struct ir3_shader_variant *hs = variants[MESA_SHADER_TESS_CTRL];
const struct ir3_shader_variant *ds = variants[MESA_SHADER_TESS_EVAL];
const struct ir3_shader_variant *gs = variants[MESA_SHADER_GEOMETRY];
if (hs) { if (hs) {
tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER); tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER);
tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER); tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER);
@ -1304,8 +1401,9 @@ tu6_emit_program_config(struct tu_cs *cs,
uint32_t vec4_size = gs->gs.vertices_in * uint32_t vec4_size = gs->gs.vertices_in *
DIV_ROUND_UP(prev_stage_output_size, 4); DIV_ROUND_UP(prev_stage_output_size, 4);
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); tu_cs_emit_regs(cs, PC_PRIMITIVE_CNTL_6(CHIP,
tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size)); .stride_in_vpc = vec4_size,
));
} }
uint32_t prim_size = prev_stage_output_size; uint32_t prim_size = prev_stage_output_size;
@ -2866,17 +2964,18 @@ void
tu6_emit_sample_locations(struct tu_cs *cs, bool enable, tu6_emit_sample_locations(struct tu_cs *cs, bool enable,
const struct vk_sample_locations_state *samp_loc) const struct vk_sample_locations_state *samp_loc)
{ {
uint32_t sample_config = tu_cs_emit_regs(cs, GRAS_SC_MSAA_SAMPLE_POS_CNTL(CHIP,
COND(enable, A6XX_RB_MSAA_SAMPLE_POS_CNTL_LOCATION_ENABLE); .location_enable = enable,
));
tu_cs_emit_regs(cs, A6XX_RB_MSAA_SAMPLE_POS_CNTL(
.location_enable = enable,
));
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_MSAA_SAMPLE_POS_CNTL, 1); if (CHIP <= A7XX) {
tu_cs_emit(cs, sample_config); tu_cs_emit_regs(cs, TPL1_MSAA_SAMPLE_POS_CNTL(CHIP,
.location_enable = enable,
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_SAMPLE_POS_CNTL, 1); ));
tu_cs_emit(cs, sample_config); }
tu_cs_emit_pkt4(cs, REG_A6XX_TPL1_MSAA_SAMPLE_POS_CNTL, 1);
tu_cs_emit(cs, sample_config);
if (!enable) if (!enable)
return; return;
@ -2903,14 +3002,21 @@ tu6_emit_sample_locations(struct tu_cs *cs, bool enable,
A6XX_RB_PROGRAMMABLE_MSAA_POS_0_SAMPLE_0_Y(y))) << i*8; A6XX_RB_PROGRAMMABLE_MSAA_POS_0_SAMPLE_0_Y(y))) << i*8;
} }
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_PROGRAMMABLE_MSAA_POS_0, 2); tu_cs_emit_regs(cs,
tu_cs_emit_qw(cs, sample_locations); GRAS_SC_PROGRAMMABLE_MSAA_POS_0(CHIP, .dword = sample_locations),
GRAS_SC_PROGRAMMABLE_MSAA_POS_1(CHIP, .dword = sample_locations >> 32),
);
tu_cs_emit_regs(cs,
A6XX_RB_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations),
A6XX_RB_PROGRAMMABLE_MSAA_POS_1(.dword = sample_locations >> 32),
);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_PROGRAMMABLE_MSAA_POS_0, 2); if (CHIP <= A7XX) {
tu_cs_emit_qw(cs, sample_locations); tu_cs_emit_regs(cs,
TPL1_PROGRAMMABLE_MSAA_POS_0(CHIP, .dword = sample_locations),
tu_cs_emit_pkt4(cs, REG_A6XX_TPL1_PROGRAMMABLE_MSAA_POS_0, 2); TPL1_PROGRAMMABLE_MSAA_POS_1(CHIP, .dword = sample_locations >> 32),
tu_cs_emit_qw(cs, sample_locations); );
}
} }
static const enum mesa_vk_dynamic_graphics_state tu_depth_bias_state[] = { static const enum mesa_vk_dynamic_graphics_state tu_depth_bias_state[] = {

View file

@ -301,11 +301,14 @@ struct tu_pvtmem_config {
bool per_wave; bool per_wave;
}; };
struct tu_shader_stages {
const struct ir3_shader_variant *vs, *hs, *ds, *gs, *fs, *cs;
};
template <chip CHIP> template <chip CHIP>
void void
tu6_emit_xs_config(struct tu_crb &crb, tu6_emit_xs_config(struct tu_crb &crb,
mesa_shader_stage stage, struct tu_shader_stages stages);
const struct ir3_shader_variant *xs);
template <chip CHIP> template <chip CHIP>
void void

View file

@ -27,7 +27,19 @@
#define NSEC_PER_SEC 1000000000ull #define NSEC_PER_SEC 1000000000ull
#define WAIT_TIMEOUT 5 #define WAIT_TIMEOUT 5
#define STAT_COUNT ((REG_A6XX_RBBM_PIPESTAT_CSINVOCATIONS - REG_A6XX_RBBM_PIPESTAT_IAVERTICES) / 2 + 1) #define __COUNTER_REG(CHIP, name) __RBBM_PIPESTAT_ ## name <CHIP>({}).reg
#define COUNTER_REG(name) __COUNTER_REG(CHIP, name)
/* Note: gen8 changes the order of the pipestat regs, but in either case
* they ones we are interested in are consecutive, so for the purposes of
* knowning how many values to read we can just use A6XX reg addresses.
*
* And in both cases, RBBM_PIPESTAT_IAVERTICES is the first one.
*
* Depending on how/if they shuffle around in the future, we might need
* to shift to reading them individually, like gallium does.
*/
#define STAT_COUNT ((__COUNTER_REG(A6XX, CSINVOCATIONS) - __COUNTER_REG(A6XX, IAVERTICES)) / 2 + 1)
struct PACKED query_slot { struct PACKED query_slot {
uint64_t available; uint64_t available;
@ -463,35 +475,38 @@ get_result_count(struct tu_query_pool *pool)
} }
} }
template <chip CHIP>
static uint32_t static uint32_t
statistics_index(uint32_t *statistics) statistics_index(uint32_t *statistics)
{ {
uint32_t stat; uint32_t stat;
stat = u_bit_scan(statistics); stat = u_bit_scan(statistics);
#define COUNTER_OFFSET(name) ((COUNTER_REG(name) - COUNTER_REG(IAVERTICES)) / 2)
switch (1 << stat) { switch (1 << stat) {
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT: case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT:
return 0; return COUNTER_OFFSET(IAVERTICES);
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT: case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT:
return 1; return COUNTER_OFFSET(IAPRIMITIVES);
case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT: case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT:
return 2; return COUNTER_OFFSET(VSINVOCATIONS);
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT: case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT:
return 5; return COUNTER_OFFSET(GSINVOCATIONS);
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT: case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT:
return 6; return COUNTER_OFFSET(GSPRIMITIVES);
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT: case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT:
return 7; return COUNTER_OFFSET(CINVOCATIONS);
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT: case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT:
return 8; return COUNTER_OFFSET(CPRIMITIVES);
case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT: case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT:
return 9; return COUNTER_OFFSET(PSINVOCATIONS);
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT: case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT:
return 3; return COUNTER_OFFSET(HSINVOCATIONS);
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT: case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT:
return 4; return COUNTER_OFFSET(DSINVOCATIONS);
case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT: case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT:
return 10; return COUNTER_OFFSET(CSINVOCATIONS);
default: default:
return 0; return 0;
} }
@ -588,6 +603,7 @@ write_performance_query_value_cpu(char *base,
} }
} }
template <chip CHIP>
static VkResult static VkResult
get_query_pool_results(struct tu_device *device, get_query_pool_results(struct tu_device *device,
struct tu_query_pool *pool, struct tu_query_pool *pool,
@ -634,7 +650,7 @@ get_query_pool_results(struct tu_device *device,
uint64_t *result; uint64_t *result;
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics); uint32_t stat_idx = statistics_index<CHIP>(&statistics);
result = query_result_addr(pool, query, uint64_t, stat_idx); result = query_result_addr(pool, query, uint64_t, stat_idx);
} else if (is_perf_query_raw(pool)) { } else if (is_perf_query_raw(pool)) {
result = query_result_addr(pool, query, struct perfcntr_query_slot, k); result = query_result_addr(pool, query, struct perfcntr_query_slot, k);
@ -703,6 +719,7 @@ get_query_pool_results(struct tu_device *device,
return result; return result;
} }
template <chip CHIP>
VKAPI_ATTR VkResult VKAPI_CALL VKAPI_ATTR VkResult VKAPI_CALL
tu_GetQueryPoolResults(VkDevice _device, tu_GetQueryPoolResults(VkDevice _device,
VkQueryPool queryPool, VkQueryPool queryPool,
@ -731,13 +748,14 @@ tu_GetQueryPoolResults(VkDevice _device,
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
return get_query_pool_results(device, pool, firstQuery, queryCount, return get_query_pool_results<CHIP>(device, pool, firstQuery, queryCount,
dataSize, pData, stride, flags); dataSize, pData, stride, flags);
default: default:
assert(!"Invalid query type"); assert(!"Invalid query type");
} }
return VK_SUCCESS; return VK_SUCCESS;
} }
TU_GENX(tu_GetQueryPoolResults);
/* Copies a query value from one buffer to another from the GPU. */ /* Copies a query value from one buffer to another from the GPU. */
static void static void
@ -808,7 +826,7 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
uint64_t result_iova; uint64_t result_iova;
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics); uint32_t stat_idx = statistics_index<CHIP>(&statistics);
result_iova = query_result_iova(pool, query, uint64_t, stat_idx); result_iova = query_result_iova(pool, query, uint64_t, stat_idx);
} else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { } else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
result_iova = query_result_iova(pool, query, result_iova = query_result_iova(pool, query,
@ -895,6 +913,7 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
} }
TU_GENX(tu_CmdCopyQueryPoolResults); TU_GENX(tu_CmdCopyQueryPoolResults);
template <chip CHIP>
static void static void
emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf, emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
struct tu_query_pool *pool, struct tu_query_pool *pool,
@ -915,7 +934,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
uint64_t result_iova; uint64_t result_iova;
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) { if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
uint32_t stat_idx = statistics_index(&statistics); uint32_t stat_idx = statistics_index<CHIP>(&statistics);
result_iova = query_result_iova(pool, query, uint64_t, stat_idx); result_iova = query_result_iova(pool, query, uint64_t, stat_idx);
} else if (is_perf_query_raw(pool)) { } else if (is_perf_query_raw(pool)) {
result_iova = query_result_iova(pool, query, result_iova = query_result_iova(pool, query,
@ -949,6 +968,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
} }
template <chip CHIP>
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
tu_CmdResetQueryPool(VkCommandBuffer commandBuffer, tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
VkQueryPool queryPool, VkQueryPool queryPool,
@ -969,12 +989,13 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR: case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount); emit_reset_query_pool<CHIP>(cmdbuf, pool, firstQuery, queryCount);
break; break;
default: default:
assert(!"Invalid query type"); assert(!"Invalid query type");
} }
} }
TU_GENX(tu_CmdResetQueryPool);
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
tu_ResetQueryPool(VkDevice device, tu_ResetQueryPool(VkDevice device,
@ -1147,7 +1168,7 @@ emit_begin_stat_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_wfi(cs); tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) | tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) |
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) | CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
CP_REG_TO_MEM_0_64B); CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, begin_iova); tu_cs_emit_qw(cs, begin_iova);
@ -1365,7 +1386,7 @@ emit_begin_prim_generated_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_wfi(cs); tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_CINVOCATIONS) | tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(CINVOCATIONS)) |
CP_REG_TO_MEM_0_CNT(2) | CP_REG_TO_MEM_0_CNT(2) |
CP_REG_TO_MEM_0_64B); CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, begin_iova); tu_cs_emit_qw(cs, begin_iova);
@ -1633,7 +1654,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_wfi(cs); tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) | tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) |
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) | CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
CP_REG_TO_MEM_0_64B); CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, end_iova); tu_cs_emit_qw(cs, end_iova);
@ -1918,7 +1939,7 @@ emit_end_prim_generated_query(struct tu_cmd_buffer *cmdbuf,
tu_cs_emit_wfi(cs); tu_cs_emit_wfi(cs);
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_CINVOCATIONS) | tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(CINVOCATIONS)) |
CP_REG_TO_MEM_0_CNT(2) | CP_REG_TO_MEM_0_CNT(2) |
CP_REG_TO_MEM_0_64B); CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, end_iova); tu_cs_emit_qw(cs, end_iova);
@ -2031,6 +2052,7 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
} }
TU_GENX(tu_CmdEndQueryIndexedEXT); TU_GENX(tu_CmdEndQueryIndexedEXT);
template <chip CHIP>
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
VkPipelineStageFlagBits2 pipelineStage, VkPipelineStageFlagBits2 pipelineStage,
@ -2067,7 +2089,7 @@ tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
} }
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_CP_ALWAYS_ON_COUNTER) | tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(__CP_ALWAYS_ON_COUNTER<CHIP>({}).reg) |
CP_REG_TO_MEM_0_CNT(2) | CP_REG_TO_MEM_0_CNT(2) |
CP_REG_TO_MEM_0_64B); CP_REG_TO_MEM_0_64B);
tu_cs_emit_qw(cs, query_result_iova(pool, query, uint64_t, 0)); tu_cs_emit_qw(cs, query_result_iova(pool, query, uint64_t, 0));
@ -2108,6 +2130,7 @@ tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
*/ */
handle_multiview_queries(cmd, pool, query); handle_multiview_queries(cmd, pool, query);
} }
TU_GENX(tu_CmdWriteTimestamp2);
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
tu_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer, tu_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,

View file

@ -1781,7 +1781,7 @@ tu6_emit_cs_config(struct tu_cs *cs,
crb.add(SP_UPDATE_CNTL(CHIP, .cs_state = true, .cs_uav = true, crb.add(SP_UPDATE_CNTL(CHIP, .cs_state = true, .cs_uav = true,
.cs_shared_const = shared_consts_enable)); .cs_shared_const = shared_consts_enable));
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_COMPUTE, v); tu6_emit_xs_config<CHIP>(crb, { .cs = v });
tu6_emit_xs(crb, cs->device, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova); tu6_emit_xs(crb, cs->device, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
} }
tu6_emit_xs_constants(cs, MESA_SHADER_COMPUTE, v, binary_iova); tu6_emit_xs_constants(cs, MESA_SHADER_COMPUTE, v, binary_iova);
@ -2031,50 +2031,54 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
need_size = true; need_size = true;
} }
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_INTERP_CNTL, 1); tu_cs_emit_regs(cs,
tu_cs_emit(cs, GRAS_CL_INTERP_CNTL(CHIP,
CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_PIXEL) | .ij_persp_pixel = VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_CENTROID) | .ij_persp_centroid = VALIDREG(ij_regid[IJ_PERSP_CENTROID]),
CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_SAMPLE) | .ij_persp_sample = VALIDREG(ij_regid[IJ_PERSP_SAMPLE]),
CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_PIXEL) | .ij_linear_pixel = VALIDREG(ij_regid[IJ_LINEAR_PIXEL]) || need_size,
CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_CENTROID) | .ij_linear_centroid = VALIDREG(ij_regid[IJ_LINEAR_CENTROID]),
CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_SAMPLE) | .ij_linear_sample = VALIDREG(ij_regid[IJ_LINEAR_SAMPLE]) || need_size_persamp,
COND(need_size, A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_PIXEL) | .coord_mask = fs->fragcoord_compmask,
COND(need_size_persamp, A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_SAMPLE) | )
COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CL_INTERP_CNTL_COORD_MASK(fs->fragcoord_compmask))); );
tu_cs_emit_pkt4(cs, REG_A6XX_RB_INTERP_CNTL, 2); tu_cs_emit_regs(cs,
tu_cs_emit(cs, A6XX_RB_INTERP_CNTL(
CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_RB_INTERP_CNTL_IJ_PERSP_PIXEL) | .ij_persp_pixel = VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_RB_INTERP_CNTL_IJ_PERSP_CENTROID) | .ij_persp_centroid = VALIDREG(ij_regid[IJ_PERSP_CENTROID]),
CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_RB_INTERP_CNTL_IJ_PERSP_SAMPLE) | .ij_persp_sample = VALIDREG(ij_regid[IJ_PERSP_SAMPLE]),
CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_RB_INTERP_CNTL_IJ_LINEAR_PIXEL) | .ij_linear_pixel = VALIDREG(ij_regid[IJ_LINEAR_PIXEL]) || need_size,
CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_RB_INTERP_CNTL_IJ_LINEAR_CENTROID) | .ij_linear_centroid = VALIDREG(ij_regid[IJ_LINEAR_CENTROID]),
CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_RB_INTERP_CNTL_IJ_LINEAR_SAMPLE) | .ij_linear_sample = VALIDREG(ij_regid[IJ_LINEAR_SAMPLE]) || need_size_persamp,
COND(need_size, A6XX_RB_INTERP_CNTL_IJ_LINEAR_PIXEL) | .coord_mask = fs->fragcoord_compmask,
COND(enable_varyings, A6XX_RB_INTERP_CNTL_INTERP_EN) | .interp_en = enable_varyings,
COND(need_size_persamp, A6XX_RB_INTERP_CNTL_IJ_LINEAR_SAMPLE) | ),
COND(fs->fragcoord_compmask != 0, A6XX_RB_PS_INPUT_CNTL(
A6XX_RB_INTERP_CNTL_COORD_MASK(fs->fragcoord_compmask))); .samplemask = VALIDREG(smask_in_regid),
tu_cs_emit(cs, .postdepthcoverage = fs->post_depth_coverage,
A6XX_RB_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE( .faceness = fs->frag_face,
sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER) | .sampleid = VALIDREG(samp_id_regid),
CONDREG(smask_in_regid, A6XX_RB_PS_INPUT_CNTL_SAMPLEMASK) | .fragcoordsamplemode = sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER,
CONDREG(samp_id_regid, A6XX_RB_PS_INPUT_CNTL_SAMPLEID) | .centerrhw = VALIDREG(ij_regid[IJ_PERSP_CENTER_RHW]),
CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_PS_INPUT_CNTL_CENTERRHW) | .foveation = VALIDREG(shading_rate_regid),
COND(fs->frag_face, A6XX_RB_PS_INPUT_CNTL_FACENESS) | ),
CONDREG(shading_rate_regid, A6XX_RB_PS_INPUT_CNTL_FOVEATION)); );
tu_cs_emit_pkt4(cs, REG_A6XX_RB_PS_SAMPLEFREQ_CNTL, 1); tu_cs_emit_regs(cs,
tu_cs_emit(cs, COND(sample_shading, A6XX_RB_PS_SAMPLEFREQ_CNTL_PER_SAMP_MODE)); A6XX_RB_PS_SAMPLEFREQ_CNTL(sample_shading)
);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 1); tu_cs_emit_regs(cs,
tu_cs_emit(cs, CONDREG(samp_id_regid, A6XX_GRAS_LRZ_PS_INPUT_CNTL_SAMPLEID) | GRAS_LRZ_PS_INPUT_CNTL(CHIP,
A6XX_GRAS_LRZ_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE( .sampleid = VALIDREG(samp_id_regid),
sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER)); .fragcoordsamplemode = sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER,
)
);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL, 1); tu_cs_emit_regs(cs,
tu_cs_emit(cs, COND(sample_shading, A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL_PER_SAMP_MODE)); A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL(sample_shading)
);
uint32_t varmask[4] = { 0 }; uint32_t varmask[4] = { 0 };
@ -2200,11 +2204,11 @@ tu6_emit_vs(struct tu_cs *cs,
bool multi_pos_output = vs->multi_pos_output; bool multi_pos_output = vs->multi_pos_output;
uint32_t multiview_views = util_logbase2(view_mask) + 1; uint32_t multiview_views = util_logbase2(view_mask) + 1;
uint32_t multiview_cntl = view_mask ? struct fd_reg_pair multiview_cntl = PC_STEREO_RENDERING_CNTL(CHIP,
A6XX_PC_STEREO_RENDERING_CNTL_ENABLE | .enable = view_mask,
A6XX_PC_STEREO_RENDERING_CNTL_VIEWS(multiview_views) | .disablemultipos = !multi_pos_output,
COND(!multi_pos_output, A6XX_PC_STEREO_RENDERING_CNTL_DISABLEMULTIPOS) .views = multiview_views,
: 0; );
/* Copy what the blob does here. This will emit an extra 0x3f /* Copy what the blob does here. This will emit an extra 0x3f
* CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
@ -2213,27 +2217,31 @@ tu6_emit_vs(struct tu_cs *cs,
if (cs->device->physical_device->info->props.has_cp_reg_write) { if (cs->device->physical_device->info->props.has_cp_reg_write) {
tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE)); tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE));
tu_cs_emit(cs, REG_A6XX_PC_STEREO_RENDERING_CNTL); tu_cs_emit(cs, multiview_cntl.reg);
} else { } else {
tu_cs_emit_pkt4(cs, REG_A6XX_PC_STEREO_RENDERING_CNTL, 1); tu_cs_emit_pkt4(cs, multiview_cntl.reg, 1);
} }
tu_cs_emit(cs, multiview_cntl); tu_cs_emit(cs, multiview_cntl.value);
tu_cs_emit_pkt4(cs, REG_A6XX_VFD_STEREO_RENDERING_CNTL, 1); tu_cs_emit_regs(cs, A6XX_VFD_STEREO_RENDERING_CNTL(
tu_cs_emit(cs, multiview_cntl); .enable = view_mask,
.disablemultipos = !multi_pos_output,
.views = multiview_views,
));
if (multiview_cntl && if (view_mask &&
cs->device->physical_device->info->props.supports_multiview_mask) { cs->device->physical_device->info->props.supports_multiview_mask) {
tu_cs_emit_pkt4(cs, REG_A6XX_PC_STEREO_RENDERING_VIEWMASK, 1); tu_cs_emit_regs(cs, PC_STEREO_RENDERING_VIEWMASK(CHIP, view_mask));
tu_cs_emit(cs, view_mask);
} }
if (CHIP >= A7XX) { if (CHIP >= A7XX) {
tu_cs_emit_pkt4(cs, REG_A7XX_VPC_STEREO_RENDERING_CNTL, 1); tu_cs_emit_regs(cs, VPC_STEREO_RENDERING_CNTL(CHIP,
tu_cs_emit(cs, multiview_cntl); .enable = view_mask,
.disablemultipos = !multi_pos_output,
.views = multiview_views,
));
tu_cs_emit_pkt4(cs, REG_A7XX_VPC_STEREO_RENDERING_VIEWMASK, 1); tu_cs_emit_regs(cs, VPC_STEREO_RENDERING_VIEWMASK(CHIP, view_mask));
tu_cs_emit(cs, view_mask);
} }
tu6_emit_vfd_dest(cs, vs); tu6_emit_vfd_dest(cs, vs);
@ -2276,8 +2284,7 @@ tu6_emit_hs(struct tu_cs *cs,
A6XX_VFD_CNTL_2_REGID_INVOCATIONID(hs_invocation_regid)); A6XX_VFD_CNTL_2_REGID_INVOCATIONID(hs_invocation_regid));
if (hs) { if (hs) {
tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_PARAM_0, 1); tu_cs_emit_regs(cs, PC_HS_PARAM_0(CHIP, hs->tess.tcs_vertices_out));
tu_cs_emit(cs, hs->tess.tcs_vertices_out);
} }
} }
TU_GENX(tu6_emit_hs); TU_GENX(tu6_emit_hs);
@ -2524,7 +2531,7 @@ tu_upload_shader(struct tu_device *dev,
size += TU6_EMIT_VFD_DEST_MAX_DWORDS; size += TU6_EMIT_VFD_DEST_MAX_DWORDS;
const unsigned xs_size = 128; const unsigned xs_size = 128;
const unsigned vpc_size = 32 + (v->stream_output.num_outputs != 0 ? 256 : 0); const unsigned vpc_size = 64 + (v->stream_output.num_outputs != 0 ? 256 : 0);
for (auto& variant : {v, binning, safe_const, safe_const_binning}) { for (auto& variant : {v, binning, safe_const, safe_const_binning}) {
if (variant) { if (variant) {

View file

@ -3,8 +3,6 @@
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
*/ */
#define FD_BO_NO_HARDPIN 1
#include "freedreno_batch.h" #include "freedreno_batch.h"
#include "fd6_barrier.h" #include "fd6_barrier.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_blend.h" #include "util/u_blend.h"
#include "util/u_dual_blend.h" #include "util/u_dual_blend.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "util/format_srgb.h" #include "util/format_srgb.h"
#include "util/half_float.h" #include "util/half_float.h"
#include "util/u_dump.h" #include "util/u_dump.h"

View file

@ -7,7 +7,6 @@
*/ */
#include "drm/freedreno_ringbuffer.h" #include "drm/freedreno_ringbuffer.h"
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_dump.h" #include "util/u_dump.h"

View file

@ -4,8 +4,6 @@
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
*/ */
#define FD_BO_NO_HARDPIN 1
#include "fd6_barrier.h" #include "fd6_barrier.h"
#include "fd6_const.h" #include "fd6_const.h"
#include "fd6_compute.h" #include "fd6_compute.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "freedreno_query_acc.h" #include "freedreno_query_acc.h"
#include "freedreno_state.h" #include "freedreno_state.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_prim.h" #include "util/u_prim.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "util/u_helpers.h" #include "util/u_helpers.h"

View file

@ -220,13 +220,17 @@ __event_write(fd_cs &cs, enum fd_gpu_event event,
fd_pkt7 pkt(cs, CP_EVENT_WRITE, len); fd_pkt7 pkt(cs, CP_EVENT_WRITE, len);
if (CHIP == A6XX) { if (CHIP == A6XX) {
pkt.add(CP_EVENT_WRITE_0_EVENT(info.raw_event) | pkt.add(CP_EVENT_WRITE_0(
COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP)); .event = info.raw_event,
.timestamp = info.needs_seqno,
));
} else if (CHIP >= A7XX) { } else if (CHIP >= A7XX) {
pkt.add(CP_EVENT_WRITE7_0_EVENT(info.raw_event) | pkt.add(CP_EVENT_WRITE7_0(
CP_EVENT_WRITE7_0_WRITE_SRC(esrc) | .event = info.raw_event,
CP_EVENT_WRITE7_0_WRITE_DST(edst) | .write_src = esrc,
COND(info.needs_seqno, CP_EVENT_WRITE7_0_WRITE_ENABLED)); .write_dst = edst,
.write_enabled = info.needs_seqno,
));
} }
if (info.needs_seqno) { if (info.needs_seqno) {

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include <stdio.h> #include <stdio.h>
#include "pipe/p_state.h" #include "pipe/p_state.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "freedreno_resource.h" #include "freedreno_resource.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include <initializer_list> #include <initializer_list>
#include "pipe/p_state.h" #include "pipe/p_state.h"
@ -884,9 +882,6 @@ emit_vpc(fd_crb &crb, const struct program_builder *b)
} }
} }
/* if vertex_flags somehow gets optimized out, your gonna have a bad time: */
assert(flags_regid != INVALID_REG);
switch (last_shader->type) { switch (last_shader->type) {
case MESA_SHADER_VERTEX: case MESA_SHADER_VERTEX:
crb.add(A6XX_SP_VS_OUTPUT_CNTL(.out = linkage.cnt)); crb.add(A6XX_SP_VS_OUTPUT_CNTL(.out = linkage.cnt));
@ -927,6 +922,9 @@ emit_vpc(fd_crb &crb, const struct program_builder *b)
)); ));
break; break;
case MESA_SHADER_GEOMETRY: case MESA_SHADER_GEOMETRY:
/* if vertex_flags somehow gets optimized out, your gonna have a bad time: */
assert(flags_regid != INVALID_REG);
crb.add(A6XX_SP_GS_OUTPUT_CNTL(.out = linkage.cnt, .flags_regid = flags_regid)); crb.add(A6XX_SP_GS_OUTPUT_CNTL(.out = linkage.cnt, .flags_regid = flags_regid));
crb.add(VPC_GS_SIV_CNTL(CHIP, crb.add(VPC_GS_SIV_CNTL(CHIP,
.layerloc = layer_loc, .layerloc = layer_loc,

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
/* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */ /* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */
#include "freedreno_query_acc.h" #include "freedreno_query_acc.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h" #include "util/u_string.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "drm-uapi/drm_fourcc.h" #include "drm-uapi/drm_fourcc.h"
#include "a6xx/fd6_blitter.h" #include "a6xx/fd6_blitter.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "drm-uapi/drm_fourcc.h" #include "drm-uapi/drm_fourcc.h"
#include "pipe/p_screen.h" #include "pipe/p_screen.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/format/u_format.h" #include "util/format/u_format.h"
#include "util/hash_table.h" #include "util/hash_table.h"

View file

@ -3,8 +3,6 @@
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
*/ */
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "freedreno_batch.h" #include "freedreno_batch.h"

View file

@ -7,8 +7,6 @@
* Rob Clark <robclark@freedesktop.org> * Rob Clark <robclark@freedesktop.org>
*/ */
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_string.h" #include "util/u_string.h"

View file

@ -235,6 +235,7 @@ freedreno_c_args += cc.get_supported_arguments([
freedreno_cpp_args = [] freedreno_cpp_args = []
freedreno_cpp_args += cpp.get_supported_arguments([ freedreno_cpp_args += cpp.get_supported_arguments([
'-DFD_BO_NO_HARDPIN=1',
'-fno-exceptions', '-fno-exceptions',
'-fno-rtti', '-fno-rtti',
'-Wno-address-of-packed-member', '-Wno-address-of-packed-member',