mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
Merge branch 'tu/convert-deprecated-reg-builders' into 'main'
turnip: Convert deprecated reg builders See merge request mesa/mesa!39029
This commit is contained in:
commit
38454dec8f
31 changed files with 579 additions and 440 deletions
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "util/macros.h"
|
||||
|
||||
#include "fd6_hw.h"
|
||||
|
||||
/* In order to debug issues with usage of stale reg data we need to have
|
||||
|
|
@ -26,6 +28,9 @@
|
|||
static inline bool
|
||||
fd_reg_stomp_allowed(chip CHIP, uint16_t reg)
|
||||
{
|
||||
PRAGMA_DIAGNOSTIC_PUSH
|
||||
PRAGMA_DIAGNOSTIC_IGNORED_CLANG(-W#pragma-messages)
|
||||
|
||||
switch (CHIP) {
|
||||
case A6XX: {
|
||||
switch (reg) {
|
||||
|
|
@ -77,6 +82,7 @@ fd_reg_stomp_allowed(chip CHIP, uint16_t reg)
|
|||
default: {
|
||||
UNREACHABLE("Unknown GPU");
|
||||
}
|
||||
PRAGMA_DIAGNOSTIC_POP
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -1095,7 +1095,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
|
|||
<bitfield name="DIRTY" pos="16" type="boolean"/>
|
||||
<bitfield name="DISABLE" pos="17" type="boolean"/>
|
||||
<bitfield name="DISABLE_ALL_GROUPS" pos="18" type="boolean"/>
|
||||
<bitfield name="LOAD_IMMED" pos="19" type="boolean"/>
|
||||
<bitfield name="LOAD_IMMED" pos="19" type="boolean" variants="A5XX"/>
|
||||
<bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/>
|
||||
<bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/>
|
||||
<bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/>
|
||||
|
|
|
|||
|
|
@ -121,6 +121,10 @@ def tab_to(name, value):
|
|||
tab_count = 1
|
||||
print(name + ('\t' * tab_count) + value)
|
||||
|
||||
def define_macro(name, value, has_variants):
|
||||
if has_variants:
|
||||
value = "__FD_DEPRECATED " + value
|
||||
tab_to(name, value)
|
||||
|
||||
def mask(low, high):
|
||||
return ((0xffffffffffffffff >> (64 - (high + 1 - low))) << low)
|
||||
|
|
@ -258,11 +262,11 @@ class Bitset(object):
|
|||
# Requires using `fui()` or `_mesa_float_to_half()`
|
||||
constexpr_mark = ""
|
||||
if reg.bit_size == 64:
|
||||
tab_to(" uint64_t", "unknown;")
|
||||
tab_to(" uint64_t", "qword;")
|
||||
tab_to(" uint64_t", "unknown;")
|
||||
else:
|
||||
tab_to(" uint32_t", "unknown;")
|
||||
tab_to(" uint32_t", "dword;")
|
||||
tab_to(" uint32_t", "unknown;")
|
||||
print("};\n")
|
||||
|
||||
if not has_variants:
|
||||
|
|
@ -407,11 +411,13 @@ class Array(object):
|
|||
print("\t\tdefault: return INVALID_IDX(idx);")
|
||||
print("\t}\n}")
|
||||
if proto == '':
|
||||
tab_to("#define REG_%s_%s" %
|
||||
(self.domain, self.name), "0x%08x\n" % array_offset)
|
||||
define_macro("#define REG_%s_%s" %
|
||||
(self.domain, self.name), "0x%08x\n" % array_offset,
|
||||
has_variants)
|
||||
else:
|
||||
tab_to("#define REG_%s_%s(%s)" % (self.domain, self.name,
|
||||
proto), "(0x%08x + %s )\n" % (array_offset, strides))
|
||||
define_macro("#define REG_%s_%s(%s)" % (self.domain, self.name,
|
||||
proto), "(0x%08x + %s )\n" % (array_offset, strides),
|
||||
has_variants)
|
||||
|
||||
def dump_pack_struct(self, has_variants):
|
||||
pass
|
||||
|
|
@ -466,10 +472,13 @@ class Reg(object):
|
|||
strides = indices_strides(self.indices())
|
||||
offset = self.total_offset()
|
||||
if proto == '':
|
||||
tab_to("#define REG_%s" % self.full_name, "0x%08x" % offset)
|
||||
define_macro("#define REG_%s" % self.full_name, "0x%08x" % offset, has_variants)
|
||||
elif not has_variants:
|
||||
print("static CONSTEXPR inline uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (
|
||||
self.full_name, proto, offset, strides))
|
||||
depcrstr = ""
|
||||
if has_variants:
|
||||
depcrstr = " __FD_DEPRECATED "
|
||||
print("static CONSTEXPR inline%s uint32_t REG_%s(%s) { return 0x%08x + %s; }" % (
|
||||
depcrstr, self.full_name, proto, offset, strides))
|
||||
|
||||
if self.bitset.inline:
|
||||
self.bitset.dump(has_variants, self.full_name, self)
|
||||
|
|
@ -980,6 +989,15 @@ def dump_c(args, guard, func):
|
|||
print("#endif")
|
||||
print()
|
||||
|
||||
# TODO figure out what to do about fd_reg_stomp_allowed()
|
||||
# vs gcc.. for now only enable the warnings with clang:
|
||||
print("#if defined(__clang__) && !defined(FD_NO_DEPRECATED_PACK)")
|
||||
print("#define __FD_DEPRECATED _Pragma (\"GCC warning \\\"Deprecated reg builder\\\"\")")
|
||||
print("#else")
|
||||
print("#define __FD_DEPRECATED")
|
||||
print("#endif")
|
||||
print()
|
||||
|
||||
func(p)
|
||||
|
||||
print("#endif /* %s */" % guard)
|
||||
|
|
|
|||
|
|
@ -484,7 +484,7 @@ r2d_setup_common(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_A2D_PIXEL_CNTL, 1);
|
||||
tu_cs_emit(cs, unknown_8c01); // TODO: seem to be always 0 on A7XX
|
||||
|
||||
uint32_t blit_cntl = A6XX_RB_A2D_BLT_CNTL(
|
||||
tu_cs_emit_regs(cs, A6XX_RB_A2D_BLT_CNTL(
|
||||
.rotate = (enum a6xx_rotation) blit_param,
|
||||
.solid_color = clear,
|
||||
.color_format = fmt,
|
||||
|
|
@ -492,13 +492,17 @@ r2d_setup_common(struct tu_cmd_buffer *cmd,
|
|||
.d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
|
||||
.mask = 0xf,
|
||||
.ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt,
|
||||
).value;
|
||||
));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_A2D_BLT_CNTL, 1);
|
||||
tu_cs_emit(cs, blit_cntl);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_A2D_BLT_CNTL, 1);
|
||||
tu_cs_emit(cs, blit_cntl);
|
||||
tu_cs_emit_regs(cs, GRAS_A2D_BLT_CNTL(CHIP,
|
||||
.rotate = (enum a6xx_rotation) blit_param,
|
||||
.solid_color = clear,
|
||||
.color_format = fmt,
|
||||
.scissor = scissor,
|
||||
.d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
|
||||
.mask = 0xf,
|
||||
.ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt,
|
||||
));
|
||||
|
||||
if (CHIP > A6XX) {
|
||||
tu_cs_emit_regs(cs, TPL1_A2D_BLT_CNTL(CHIP, .raw_copy = false,
|
||||
|
|
@ -871,16 +875,12 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, enum r3d_type type,
|
|||
.cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
|
||||
.gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,));
|
||||
|
||||
tu_crb crb = cs->crb(2 * 5 + 2 * 11);
|
||||
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_VERTEX, vs);
|
||||
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_TESS_CTRL, NULL);
|
||||
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_TESS_EVAL, NULL);
|
||||
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_GEOMETRY, NULL);
|
||||
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_FRAGMENT, fs);
|
||||
with_crb (cs, 2 * 5 + 2 * 11) {
|
||||
tu6_emit_xs_config<CHIP>(crb, { .vs = vs, .fs = fs });
|
||||
struct tu_pvtmem_config pvtmem = {};
|
||||
tu6_emit_xs(crb, cs->device, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova);
|
||||
tu6_emit_xs(crb, cs->device, MESA_SHADER_FRAGMENT, fs, &pvtmem, fs_iova);
|
||||
crb.flush();
|
||||
}
|
||||
|
||||
tu6_emit_xs_constants(cs, MESA_SHADER_VERTEX, vs, vs_iova);
|
||||
tu6_emit_xs_constants(cs, MESA_SHADER_FRAGMENT, fs, fs_iova);
|
||||
|
|
@ -5311,12 +5311,12 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
|
|||
* save/restore them dynamically.
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1);
|
||||
tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A6XX_RB_CNTL) |
|
||||
tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(RB_CNTL(CHIP).reg) |
|
||||
CP_REG_TO_SCRATCH_0_SCRATCH(0) |
|
||||
CP_REG_TO_SCRATCH_0_CNT(1 - 1));
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1);
|
||||
tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A7XX_RB_BUFFER_CNTL) |
|
||||
tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(RB_BUFFER_CNTL(CHIP).reg) |
|
||||
CP_REG_TO_SCRATCH_0_SCRATCH(1) |
|
||||
CP_REG_TO_SCRATCH_0_CNT(1 - 1));
|
||||
}
|
||||
|
|
@ -5357,18 +5357,18 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
|
|||
|
||||
/* Restore RB_CNTL/GRAS_SC_BIN_CNTL saved above. */
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
|
||||
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_RB_CNTL) |
|
||||
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(RB_CNTL(CHIP).reg) |
|
||||
CP_SCRATCH_TO_REG_0_SCRATCH(0) |
|
||||
CP_SCRATCH_TO_REG_0_CNT(1 - 1));
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
|
||||
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_GRAS_SC_BIN_CNTL) |
|
||||
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(GRAS_SC_BIN_CNTL(CHIP).reg) |
|
||||
CP_SCRATCH_TO_REG_0_SCRATCH(0) |
|
||||
CP_SCRATCH_TO_REG_0_CNT(1 - 1));
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
|
||||
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A7XX_RB_BUFFER_CNTL) |
|
||||
tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(RB_BUFFER_CNTL(CHIP).reg) |
|
||||
CP_SCRATCH_TO_REG_0_SCRATCH(1) |
|
||||
CP_SCRATCH_TO_REG_0_CNT(1 - 1));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -671,17 +671,32 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
|
|||
&cmd->state.pass->attachments[a];
|
||||
enum a6xx_depth_format fmt = tu6_pipe2depth(attachment->format);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
|
||||
tu_cs_emit(cs, RB_DEPTH_BUFFER_INFO(CHIP,
|
||||
unsigned depth_pitch, depth_array_pitch;
|
||||
uint64_t depth_base;
|
||||
|
||||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
depth_pitch = iview->depth_pitch;
|
||||
depth_array_pitch = iview->depth_layer_size;
|
||||
depth_base = iview->depth_base_addr;
|
||||
} else {
|
||||
depth_pitch = iview->view.pitch;
|
||||
depth_array_pitch = iview->view.layer_size;
|
||||
depth_base = tu_layer_address(&iview->view, 0);
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
RB_DEPTH_BUFFER_INFO(CHIP,
|
||||
.depth_format = fmt,
|
||||
.tilemode = TILE6_3,
|
||||
.losslesscompen = iview->view.ubwc_enabled,
|
||||
).value);
|
||||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
tu_cs_image_depth_ref(cs, iview, 0);
|
||||
else
|
||||
tu_cs_image_ref(cs, &iview->view, 0);
|
||||
tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0));
|
||||
),
|
||||
A6XX_RB_DEPTH_BUFFER_PITCH(depth_pitch),
|
||||
A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(depth_array_pitch),
|
||||
A6XX_RB_DEPTH_BUFFER_BASE(depth_base),
|
||||
A6XX_RB_DEPTH_GMEM_BASE(
|
||||
tu_attachment_gmem_offset(cmd, attachment, 0)
|
||||
),
|
||||
);
|
||||
|
||||
tu_cs_emit_regs(cs, GRAS_SU_DEPTH_BUFFER_INFO(CHIP, .depth_format = fmt));
|
||||
|
||||
|
|
@ -691,18 +706,31 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd,
|
|||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
|
||||
attachment->format == VK_FORMAT_S8_UINT) {
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_BUFFER_INFO, 6);
|
||||
tu_cs_emit(cs, RB_STENCIL_BUFFER_INFO(CHIP,
|
||||
unsigned stencil_pitch, stencil_array_pitch, stencil_gmem_offset;
|
||||
uint64_t stencil_base;
|
||||
|
||||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
stencil_pitch = iview->stencil_pitch;
|
||||
stencil_array_pitch = iview->stencil_layer_size;
|
||||
stencil_base = iview->stencil_base_addr;
|
||||
stencil_gmem_offset = tu_attachment_gmem_offset_stencil(cmd, attachment, 0);
|
||||
} else {
|
||||
stencil_pitch = iview->view.pitch;
|
||||
stencil_array_pitch = iview->view.layer_size;
|
||||
stencil_base = tu_layer_address(&iview->view, 0);
|
||||
stencil_gmem_offset = tu_attachment_gmem_offset(cmd, attachment, 0);
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
RB_STENCIL_BUFFER_INFO(CHIP,
|
||||
.separate_stencil = true,
|
||||
.tilemode = TILE6_3,
|
||||
).value);
|
||||
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
tu_cs_image_stencil_ref(cs, iview, 0);
|
||||
tu_cs_emit(cs, tu_attachment_gmem_offset_stencil(cmd, attachment, 0));
|
||||
} else {
|
||||
tu_cs_image_ref(cs, &iview->view, 0);
|
||||
tu_cs_emit(cs, tu_attachment_gmem_offset(cmd, attachment, 0));
|
||||
}
|
||||
),
|
||||
A6XX_RB_STENCIL_BUFFER_PITCH(stencil_pitch),
|
||||
A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(stencil_array_pitch),
|
||||
A6XX_RB_STENCIL_BUFFER_BASE(stencil_base),
|
||||
A6XX_RB_STENCIL_GMEM_BASE(stencil_gmem_offset),
|
||||
);
|
||||
} else {
|
||||
tu_cs_emit_regs(cs,
|
||||
RB_STENCIL_BUFFER_INFO(CHIP, 0));
|
||||
|
|
@ -898,7 +926,7 @@ tu6_emit_render_cntl<A6XX>(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
|
||||
if (no_track) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CNTL, 1);
|
||||
tu_cs_emit_pkt4(cs, RB_RENDER_CNTL(A6XX).reg, 1);
|
||||
tu_cs_emit(cs, cntl);
|
||||
return;
|
||||
}
|
||||
|
|
@ -917,7 +945,7 @@ tu6_emit_render_cntl<A6XX>(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
|
||||
tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
|
||||
tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL);
|
||||
tu_cs_emit(cs, RB_RENDER_CNTL(A6XX).reg);
|
||||
tu_cs_emit(cs, cntl);
|
||||
}
|
||||
|
||||
|
|
@ -2033,13 +2061,13 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
|
|||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_NC_MODE_CNTL_2, 0);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_PERFCTR_SHADER_MASK, 0x3f);
|
||||
if (CHIP == A6XX && !cs->device->physical_device->info->props.is_a702)
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_UNKNOWN_B605, 0x44);
|
||||
tu_cs_emit_regs(cs, TPL1_UNKNOWN_B605(CHIP, .dword = 0x44));
|
||||
if (CHIP == A6XX) {
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0);
|
||||
tu_cs_emit_regs(cs, HLSQ_UNKNOWN_BE00(CHIP, .dword = 0x80));
|
||||
tu_cs_emit_regs(cs, HLSQ_UNKNOWN_BE01(CHIP));
|
||||
}
|
||||
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_GFX_USIZE, 0); // 2 on a740 ???
|
||||
tu_cs_emit_regs(cs, SP_GFX_USIZE(CHIP));
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_PS_ROTATION_CNTL, 0);
|
||||
if (CHIP == A6XX)
|
||||
tu_cs_emit_regs(cs, HLSQ_SHARED_CONSTS(CHIP, .enable = false));
|
||||
|
|
@ -2062,9 +2090,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
|
|||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0);
|
||||
|
||||
tu_cs_emit_regs(cs, RB_UNKNOWN_88F0(CHIP));
|
||||
}
|
||||
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0);
|
||||
|
||||
tu_cs_emit_regs(cs, VPC_REPLACE_MODE_CNTL(CHIP, false));
|
||||
tu_cs_emit_regs(cs, VPC_ROTATION_CNTL(CHIP));
|
||||
|
|
@ -2078,10 +2107,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
|
|||
tu_cs_emit_regs(cs, GRAS_SU_CONSERVATIVE_RAS_CNTL(CHIP, 0));
|
||||
tu_cs_emit_regs(cs, PC_DGEN_SU_CONSERVATIVE_RAS_CNTL(CHIP));
|
||||
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0);
|
||||
tu_cs_emit_regs(cs, VPC_UNKNOWN_9210(CHIP));
|
||||
tu_cs_emit_regs(cs, VPC_UNKNOWN_9211(CHIP));
|
||||
}
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_LB_MODE_CNTL, 0);
|
||||
tu_cs_emit_regs(cs, VPC_LB_MODE_CNTL(CHIP));
|
||||
tu_cs_emit_regs(cs, PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE(CHIP));
|
||||
tu_cs_emit_regs(cs, A6XX_TPL1_MODE_CNTL(.isammode = ISAMMODE_GL,
|
||||
.texcoordroundmode = dev->instance->use_tex_coord_round_nearest_even_mode
|
||||
|
|
@ -2152,9 +2181,10 @@ tu6_init_static_regs(struct tu_device *dev, struct tu_cs *cs)
|
|||
* zero-instance draw calls. See IR3_CONST_ALLOC_DRIVER_PARAMS allocation
|
||||
* for more info.
|
||||
*/
|
||||
tu_cs_emit_pkt4(
|
||||
cs, CHIP == A6XX ? REG_A6XX_SP_VS_CONST_CONFIG : REG_A7XX_SP_VS_CONST_CONFIG, 1);
|
||||
tu_cs_emit(cs, A6XX_SP_VS_CONST_CONFIG_CONSTLEN(8) | A6XX_SP_VS_CONST_CONFIG_ENABLED);
|
||||
tu_cs_emit_regs(cs, SP_VS_CONST_CONFIG(CHIP,
|
||||
.constlen = 8,
|
||||
.enabled = true,
|
||||
));
|
||||
}
|
||||
|
||||
/* Emit the bin restore preamble, which runs in between bins when L1
|
||||
|
|
@ -9096,7 +9126,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
* previous dispatches to finish.
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
|
||||
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_SP_CS_NDRANGE_1));
|
||||
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(SP_CS_NDRANGE_1(CHIP).reg));
|
||||
tu_cs_emit_qw(cs, info->indirect);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2);
|
||||
|
|
@ -9121,7 +9151,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
CP_REG_RMW_0_SKIP_WAIT_FOR_ME |
|
||||
CP_REG_RMW_0_SRC0_IS_REG |
|
||||
CP_REG_RMW_0_SRC1_ADD);
|
||||
tu_cs_emit(cs, REG_A7XX_SP_CS_NDRANGE_1); /* SRC0 */
|
||||
tu_cs_emit(cs, SP_CS_NDRANGE_1(CHIP).reg); /* SRC0 */
|
||||
tu_cs_emit(cs, -1); /* SRC1 */
|
||||
|
||||
/* scratch0 = ((scratch0 & (local_size - 1)) rot 2
|
||||
|
|
@ -9139,7 +9169,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
/* write scratch0 to SP_CS_NDRANGE_7 */
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
|
||||
tu_cs_emit(cs,
|
||||
CP_SCRATCH_TO_REG_0_REG(REG_A7XX_SP_CS_NDRANGE_7) |
|
||||
CP_SCRATCH_TO_REG_0_REG(SP_CS_NDRANGE_7(CHIP).reg) |
|
||||
CP_SCRATCH_TO_REG_0_SCRATCH(0));
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_WRITE, 2);
|
||||
|
|
@ -9157,7 +9187,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
CP_REG_RMW_0_SKIP_WAIT_FOR_ME |
|
||||
CP_REG_RMW_0_SRC0_IS_REG |
|
||||
CP_REG_RMW_0_SRC1_ADD);
|
||||
tu_cs_emit(cs, REG_A7XX_SP_CS_NDRANGE_1); /* SRC0 */
|
||||
tu_cs_emit(cs, SP_CS_NDRANGE_1(CHIP).reg); /* SRC0 */
|
||||
tu_cs_emit(cs, local_size[0] - 1); /* SRC1 */
|
||||
|
||||
unsigned local_size_log2 = util_logbase2(local_size[0]);
|
||||
|
|
@ -9179,7 +9209,7 @@ tu_dispatch(struct tu_cmd_buffer *cmd,
|
|||
/* write scratch0 to SP_CS_KERNEL_GROUP_X */
|
||||
tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
|
||||
tu_cs_emit(cs,
|
||||
CP_SCRATCH_TO_REG_0_REG(REG_A7XX_SP_CS_KERNEL_GROUP_X) |
|
||||
CP_SCRATCH_TO_REG_0_REG(SP_CS_KERNEL_GROUP_X(CHIP).reg) |
|
||||
CP_SCRATCH_TO_REG_0_SCRATCH(0));
|
||||
} else {
|
||||
tu_cs_emit_regs(cs,
|
||||
|
|
|
|||
|
|
@ -147,30 +147,6 @@ tu_layer_flag_address(const struct fdl6_view *iview, uint32_t layer)
|
|||
return iview->ubwc_addr + iview->ubwc_layer_size * layer;
|
||||
}
|
||||
|
||||
void
|
||||
tu_cs_image_ref(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer)
|
||||
{
|
||||
tu_cs_emit(cs, A6XX_RB_MRT_PITCH(0, iview->pitch).value);
|
||||
tu_cs_emit(cs, iview->layer_size >> 6);
|
||||
tu_cs_emit_qw(cs, tu_layer_address(iview, layer));
|
||||
}
|
||||
|
||||
void
|
||||
tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
||||
{
|
||||
tu_cs_emit(cs, A6XX_RB_STENCIL_BUFFER_PITCH(iview->stencil_pitch).value);
|
||||
tu_cs_emit(cs, iview->stencil_layer_size >> 6);
|
||||
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
|
||||
}
|
||||
|
||||
void
|
||||
tu_cs_image_depth_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
|
||||
{
|
||||
tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(iview->depth_pitch).value);
|
||||
tu_cs_emit(cs, iview->depth_layer_size >> 6);
|
||||
tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu_cs_image_ref_2d(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, bool src)
|
||||
|
|
|
|||
|
|
@ -732,14 +732,14 @@ tu_lrz_before_sysmem_br(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit(cs, if_dwords + 1);
|
||||
/* GRAS_LRZ_DEPTH_CLEAR = lrz_fc->buffer[1].depth_clear_val */
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
|
||||
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_GRAS_LRZ_DEPTH_CLEAR));
|
||||
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(GRAS_LRZ_DEPTH_CLEAR(CHIP).reg));
|
||||
tu_cs_emit_qw(cs, lrz_fc_iova + offsetof(fd_lrzfc_layout<A7XX>,
|
||||
buffer[1].depth_clear_val));
|
||||
/* } else { */
|
||||
tu_cs_emit_pkt7(cs, CP_NOP, else_dwords);
|
||||
/* GRAS_LRZ_DEPTH_CLEAR = lrz_fc->buffer[0].depth_clear_val */
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
|
||||
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(REG_A7XX_GRAS_LRZ_DEPTH_CLEAR));
|
||||
tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(GRAS_LRZ_DEPTH_CLEAR(CHIP).reg));
|
||||
tu_cs_emit_qw(cs, lrz_fc_iova + offsetof(fd_lrzfc_layout<A7XX>,
|
||||
buffer[0].depth_clear_val));
|
||||
/* } */
|
||||
|
|
|
|||
|
|
@ -338,71 +338,72 @@ tu_push_consts_type(const struct tu_pipeline_layout *layout,
|
|||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
struct xs_config {
|
||||
uint16_t reg_sp_xs_config;
|
||||
uint16_t reg_hlsq_xs_ctrl;
|
||||
};
|
||||
static uint32_t
|
||||
sp_xs_config(const struct ir3_shader_variant *v)
|
||||
{
|
||||
if (!v)
|
||||
return 0;
|
||||
|
||||
template <chip CHIP>
|
||||
static const xs_config<CHIP> xs_configs[] = {
|
||||
[MESA_SHADER_VERTEX] = {
|
||||
REG_A6XX_SP_VS_CONFIG,
|
||||
CHIP == A6XX ? REG_A6XX_SP_VS_CONST_CONFIG : REG_A7XX_SP_VS_CONST_CONFIG,
|
||||
},
|
||||
[MESA_SHADER_TESS_CTRL] = {
|
||||
REG_A6XX_SP_HS_CONFIG,
|
||||
CHIP == A6XX ? REG_A6XX_SP_HS_CONST_CONFIG : REG_A7XX_SP_HS_CONST_CONFIG,
|
||||
},
|
||||
[MESA_SHADER_TESS_EVAL] = {
|
||||
REG_A6XX_SP_DS_CONFIG,
|
||||
CHIP == A6XX ? REG_A6XX_SP_DS_CONST_CONFIG : REG_A7XX_SP_DS_CONST_CONFIG,
|
||||
},
|
||||
[MESA_SHADER_GEOMETRY] = {
|
||||
REG_A6XX_SP_GS_CONFIG,
|
||||
CHIP == A6XX ? REG_A6XX_SP_GS_CONST_CONFIG : REG_A7XX_SP_GS_CONST_CONFIG,
|
||||
},
|
||||
[MESA_SHADER_FRAGMENT] = {
|
||||
REG_A6XX_SP_PS_CONFIG,
|
||||
CHIP == A6XX ? REG_A6XX_SP_PS_CONST_CONFIG : REG_A7XX_SP_PS_CONST_CONFIG,
|
||||
},
|
||||
[MESA_SHADER_COMPUTE] = {
|
||||
REG_A6XX_SP_CS_CONFIG,
|
||||
CHIP == A6XX ? REG_A6XX_SP_CS_CONST_CONFIG : REG_A7XX_SP_CS_CONST_CONFIG,
|
||||
},
|
||||
};
|
||||
return A6XX_SP_VS_CONFIG_ENABLED |
|
||||
COND(v->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
|
||||
COND(v->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
|
||||
COND(v->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_UAV) |
|
||||
COND(v->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
|
||||
A6XX_SP_VS_CONFIG_NUAV(ir3_shader_num_uavs(v)) |
|
||||
A6XX_SP_VS_CONFIG_NTEX(v->num_samp) |
|
||||
A6XX_SP_VS_CONFIG_NSAMP(v->num_samp);
|
||||
}
|
||||
|
||||
static bool
|
||||
push_shared_consts(const struct ir3_shader_variant *v)
|
||||
{
|
||||
return v && v->shader_options.push_consts_type == IR3_PUSH_CONSTS_SHARED_PREAMBLE;
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu6_emit_xs_config(struct tu_crb &crb,
|
||||
mesa_shader_stage stage, /* xs->type, but xs may be NULL */
|
||||
const struct ir3_shader_variant *xs)
|
||||
tu6_emit_xs_config(struct tu_crb &crb, struct tu_shader_stages stages)
|
||||
{
|
||||
const struct xs_config<CHIP> *cfg = &xs_configs<CHIP>[stage];
|
||||
if (stages.cs) {
|
||||
crb.add(SP_CS_CONST_CONFIG(CHIP,
|
||||
.constlen = stages.cs->constlen,
|
||||
.enabled = true,
|
||||
.read_imm_shared_consts = push_shared_consts(stages.cs),
|
||||
));
|
||||
crb.add(A6XX_SP_CS_CONFIG(.dword = sp_xs_config(stages.cs)));
|
||||
} else {
|
||||
crb.add(SP_VS_CONST_CONFIG(CHIP,
|
||||
.constlen = COND(stages.vs, stages.vs->constlen),
|
||||
.enabled = stages.vs,
|
||||
.read_imm_shared_consts = push_shared_consts(stages.vs),
|
||||
));
|
||||
crb.add(SP_HS_CONST_CONFIG(CHIP,
|
||||
.constlen = COND(stages.hs, stages.hs->constlen),
|
||||
.enabled = stages.hs,
|
||||
.read_imm_shared_consts = push_shared_consts(stages.hs),
|
||||
));
|
||||
crb.add(SP_DS_CONST_CONFIG(CHIP,
|
||||
.constlen = COND(stages.ds, stages.ds->constlen),
|
||||
.enabled = stages.ds,
|
||||
.read_imm_shared_consts = push_shared_consts(stages.ds),
|
||||
));
|
||||
crb.add(SP_GS_CONST_CONFIG(CHIP,
|
||||
.constlen = COND(stages.gs, stages.gs->constlen),
|
||||
.enabled = stages.gs,
|
||||
.read_imm_shared_consts = push_shared_consts(stages.gs),
|
||||
));
|
||||
crb.add(SP_PS_CONST_CONFIG(CHIP,
|
||||
.constlen = COND(stages.fs, stages.fs->constlen),
|
||||
.enabled = stages.fs,
|
||||
.read_imm_shared_consts = push_shared_consts(stages.fs),
|
||||
));
|
||||
|
||||
if (!xs) {
|
||||
/* shader stage disabled */
|
||||
crb.add(tu_reg_value { .reg = cfg->reg_sp_xs_config, .value = 0 });
|
||||
crb.add(tu_reg_value { .reg = cfg->reg_hlsq_xs_ctrl, .value = 0 });
|
||||
return;
|
||||
crb.add(A6XX_SP_VS_CONFIG(.dword = sp_xs_config(stages.vs)));
|
||||
crb.add(A6XX_SP_HS_CONFIG(.dword = sp_xs_config(stages.hs)));
|
||||
crb.add(A6XX_SP_DS_CONFIG(.dword = sp_xs_config(stages.ds)));
|
||||
crb.add(A6XX_SP_GS_CONFIG(.dword = sp_xs_config(stages.gs)));
|
||||
crb.add(A6XX_SP_PS_CONFIG(.dword = sp_xs_config(stages.fs)));
|
||||
}
|
||||
|
||||
crb.add(tu_reg_value {
|
||||
.reg = cfg->reg_sp_xs_config,
|
||||
.value = A6XX_SP_VS_CONFIG_ENABLED |
|
||||
COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
|
||||
COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
|
||||
COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_UAV) |
|
||||
COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
|
||||
A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) |
|
||||
A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp) });
|
||||
crb.add(tu_reg_value {
|
||||
.reg = cfg->reg_hlsq_xs_ctrl,
|
||||
.value = A6XX_SP_VS_CONST_CONFIG_CONSTLEN(xs->constlen) |
|
||||
A6XX_SP_VS_CONST_CONFIG_ENABLED |
|
||||
COND(xs->shader_options.push_consts_type ==
|
||||
IR3_PUSH_CONSTS_SHARED_PREAMBLE,
|
||||
A7XX_SP_VS_CONST_CONFIG_READ_IMM_SHARED_CONSTS) });
|
||||
}
|
||||
TU_GENX(tu6_emit_xs_config);
|
||||
|
||||
|
|
@ -782,73 +783,6 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
const struct ir3_shader_variant *gs,
|
||||
const struct ir3_shader_variant *fs)
|
||||
{
|
||||
/* note: doesn't compile as static because of the array regs.. */
|
||||
const struct reg_config {
|
||||
uint16_t reg_sp_xs_out_reg;
|
||||
uint16_t reg_sp_xs_vpc_dst_reg;
|
||||
uint16_t reg_vpc_xs_pack;
|
||||
uint16_t reg_vpc_xs_clip_cntl;
|
||||
uint16_t reg_vpc_xs_clip_cntl_v2;
|
||||
uint16_t reg_gras_xs_cl_cntl;
|
||||
uint16_t reg_pc_xs_out_cntl;
|
||||
uint16_t reg_sp_xs_primitive_cntl;
|
||||
uint16_t reg_vpc_xs_layer_cntl;
|
||||
uint16_t reg_vpc_xs_layer_cntl_v2;
|
||||
uint16_t reg_gras_xs_layer_cntl;
|
||||
} reg_config[] = {
|
||||
[MESA_SHADER_VERTEX] = {
|
||||
REG_A6XX_SP_VS_OUTPUT_REG(0),
|
||||
REG_A6XX_SP_VS_VPC_DEST_REG(0),
|
||||
REG_A6XX_VPC_VS_CNTL,
|
||||
REG_A6XX_VPC_VS_CLIP_CULL_CNTL,
|
||||
REG_A6XX_VPC_VS_CLIP_CULL_CNTL_V2,
|
||||
REG_A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE,
|
||||
REG_A6XX_PC_VS_CNTL,
|
||||
REG_A6XX_SP_VS_OUTPUT_CNTL,
|
||||
REG_A6XX_VPC_VS_SIV_CNTL,
|
||||
REG_A6XX_VPC_VS_SIV_CNTL_V2,
|
||||
REG_A6XX_GRAS_SU_VS_SIV_CNTL,
|
||||
},
|
||||
[MESA_SHADER_TESS_CTRL] = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
REG_A6XX_PC_HS_CNTL,
|
||||
0,
|
||||
0,
|
||||
0
|
||||
},
|
||||
[MESA_SHADER_TESS_EVAL] = {
|
||||
REG_A6XX_SP_DS_OUTPUT_REG(0),
|
||||
REG_A6XX_SP_DS_VPC_DEST_REG(0),
|
||||
REG_A6XX_VPC_DS_CNTL,
|
||||
REG_A6XX_VPC_DS_CLIP_CULL_CNTL,
|
||||
REG_A6XX_VPC_DS_CLIP_CULL_CNTL_V2,
|
||||
REG_A6XX_GRAS_CL_DS_CLIP_CULL_DISTANCE,
|
||||
REG_A6XX_PC_DS_CNTL,
|
||||
REG_A6XX_SP_DS_OUTPUT_CNTL,
|
||||
REG_A6XX_VPC_DS_SIV_CNTL,
|
||||
REG_A6XX_VPC_DS_SIV_CNTL_V2,
|
||||
REG_A6XX_GRAS_SU_DS_SIV_CNTL,
|
||||
},
|
||||
[MESA_SHADER_GEOMETRY] = {
|
||||
REG_A6XX_SP_GS_OUTPUT_REG(0),
|
||||
REG_A6XX_SP_GS_VPC_DEST_REG(0),
|
||||
REG_A6XX_VPC_GS_CNTL,
|
||||
REG_A6XX_VPC_GS_CLIP_CULL_CNTL,
|
||||
REG_A6XX_VPC_GS_CLIP_CULL_CNTL_V2,
|
||||
REG_A6XX_GRAS_CL_GS_CLIP_CULL_DISTANCE,
|
||||
REG_A6XX_PC_GS_CNTL,
|
||||
REG_A6XX_SP_GS_OUTPUT_CNTL,
|
||||
REG_A6XX_VPC_GS_SIV_CNTL,
|
||||
REG_A6XX_VPC_GS_SIV_CNTL_V2,
|
||||
REG_A6XX_GRAS_SU_GS_SIV_CNTL,
|
||||
},
|
||||
};
|
||||
|
||||
const struct ir3_shader_variant *last_shader;
|
||||
if (gs) {
|
||||
last_shader = gs;
|
||||
|
|
@ -858,8 +792,6 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
last_shader = vs;
|
||||
}
|
||||
|
||||
const struct reg_config *cfg = ®_config[last_shader->type];
|
||||
|
||||
struct ir3_shader_linkage linkage = {
|
||||
.primid_loc = 0xff,
|
||||
.clip0_loc = 0xff,
|
||||
|
|
@ -961,6 +893,8 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
if (linkage.cnt == 0)
|
||||
ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc);
|
||||
|
||||
tu6_emit_vpc_varying_modes<CHIP>(cs, fs, last_shader);
|
||||
|
||||
/* map outputs of the last shader to VPC */
|
||||
assert(linkage.cnt <= 32);
|
||||
const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2);
|
||||
|
|
@ -975,30 +909,121 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
A6XX_SP_VS_VPC_DEST_REG_OUTLOC0(linkage.var[i].loc);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count);
|
||||
tu_cs_emit_array(cs, sp_out, sp_out_count);
|
||||
tu_crb crb = cs->crb(sp_out_count + sp_vpc_dst_count + 12);
|
||||
uint32_t *regs;
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count);
|
||||
tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count);
|
||||
switch (last_shader->type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
regs = (uint32_t *)sp_out;
|
||||
for (unsigned i = 0; i < sp_out_count; i++)
|
||||
crb.add(A6XX_SP_VS_OUTPUT_REG(i, .dword = regs[i]));
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1);
|
||||
tu_cs_emit(cs, A6XX_VPC_VS_CNTL_POSITIONLOC(position_loc) |
|
||||
A6XX_VPC_VS_CNTL_PSIZELOC(pointsize_loc) |
|
||||
A6XX_VPC_VS_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
|
||||
A6XX_VPC_VS_CNTL_EXTRAPOS(extra_pos));
|
||||
regs = (uint32_t *)sp_vpc_dst;
|
||||
for (unsigned i = 0; i < sp_vpc_dst_count; i++)
|
||||
crb.add(A6XX_SP_VS_VPC_DEST_REG(i, .dword = regs[i]));
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1);
|
||||
tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_MASK(clip_cull_mask) |
|
||||
A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
|
||||
A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_47_LOC(clip1_loc));
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl_v2, 1);
|
||||
tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_MASK(clip_cull_mask) |
|
||||
A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
|
||||
A6XX_VPC_VS_CLIP_CULL_CNTL_CLIP_DIST_47_LOC(clip1_loc));
|
||||
crb.add(VPC_VS_CNTL(CHIP,
|
||||
.stride_in_vpc = linkage.max_loc,
|
||||
.positionloc = position_loc,
|
||||
.psizeloc = pointsize_loc,
|
||||
.extrapos = extra_pos,
|
||||
));
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1);
|
||||
tu_cs_emit(cs, A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE_CLIP_MASK(last_shader->clip_mask) |
|
||||
A6XX_GRAS_CL_VS_CLIP_CULL_DISTANCE_CULL_MASK(last_shader->cull_mask));
|
||||
crb.add(VPC_VS_CLIP_CULL_CNTL(CHIP,
|
||||
.clip_mask = clip_cull_mask,
|
||||
.clip_dist_03_loc = clip0_loc,
|
||||
.clip_dist_47_loc = clip1_loc,
|
||||
));
|
||||
|
||||
if (CHIP <= A7XX) {
|
||||
crb.add(VPC_VS_CLIP_CULL_CNTL_V2(CHIP,
|
||||
.clip_mask = clip_cull_mask,
|
||||
.clip_dist_03_loc = clip0_loc,
|
||||
.clip_dist_47_loc = clip1_loc,
|
||||
));
|
||||
}
|
||||
|
||||
crb.add(GRAS_CL_VS_CLIP_CULL_DISTANCE(CHIP,
|
||||
.clip_mask = last_shader->clip_mask,
|
||||
.cull_mask = last_shader->cull_mask,
|
||||
));
|
||||
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
regs = (uint32_t *)sp_out;
|
||||
for (unsigned i = 0; i < sp_out_count; i++)
|
||||
crb.add(A6XX_SP_DS_OUTPUT_REG(i, .dword = regs[i]));
|
||||
|
||||
regs = (uint32_t *)sp_vpc_dst;
|
||||
for (unsigned i = 0; i < sp_vpc_dst_count; i++)
|
||||
crb.add(A6XX_SP_DS_VPC_DEST_REG(i, .dword = regs[i]));
|
||||
|
||||
crb.add(VPC_DS_CNTL(CHIP,
|
||||
.stride_in_vpc = linkage.max_loc,
|
||||
.positionloc = position_loc,
|
||||
.psizeloc = pointsize_loc,
|
||||
.extrapos = extra_pos,
|
||||
));
|
||||
|
||||
crb.add(VPC_DS_CLIP_CULL_CNTL(CHIP,
|
||||
.clip_mask = clip_cull_mask,
|
||||
.clip_dist_03_loc = clip0_loc,
|
||||
.clip_dist_47_loc = clip1_loc,
|
||||
));
|
||||
|
||||
if (CHIP <= A7XX) {
|
||||
crb.add(VPC_DS_CLIP_CULL_CNTL_V2(CHIP,
|
||||
.clip_mask = clip_cull_mask,
|
||||
.clip_dist_03_loc = clip0_loc,
|
||||
.clip_dist_47_loc = clip1_loc,
|
||||
));
|
||||
}
|
||||
|
||||
crb.add(GRAS_CL_DS_CLIP_CULL_DISTANCE(CHIP,
|
||||
.clip_mask = last_shader->clip_mask,
|
||||
.cull_mask = last_shader->cull_mask,
|
||||
));
|
||||
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
regs = (uint32_t *)sp_out;
|
||||
for (unsigned i = 0; i < sp_out_count; i++)
|
||||
crb.add(A6XX_SP_GS_OUTPUT_REG(i, .dword = regs[i]));
|
||||
|
||||
regs = (uint32_t *)sp_vpc_dst;
|
||||
for (unsigned i = 0; i < sp_vpc_dst_count; i++)
|
||||
crb.add(A6XX_SP_GS_VPC_DEST_REG(i, .dword = regs[i]));
|
||||
|
||||
crb.add(VPC_GS_CNTL(CHIP,
|
||||
.stride_in_vpc = linkage.max_loc,
|
||||
.positionloc = position_loc,
|
||||
.psizeloc = pointsize_loc,
|
||||
.extrapos = extra_pos,
|
||||
));
|
||||
|
||||
crb.add(VPC_GS_CLIP_CULL_CNTL(CHIP,
|
||||
.clip_mask = clip_cull_mask,
|
||||
.clip_dist_03_loc = clip0_loc,
|
||||
.clip_dist_47_loc = clip1_loc,
|
||||
));
|
||||
|
||||
if (CHIP <= A7XX) {
|
||||
crb.add(VPC_GS_CLIP_CULL_CNTL_V2(CHIP,
|
||||
.clip_mask = clip_cull_mask,
|
||||
.clip_dist_03_loc = clip0_loc,
|
||||
.clip_dist_47_loc = clip1_loc,
|
||||
));
|
||||
}
|
||||
|
||||
crb.add(GRAS_CL_GS_CLIP_CULL_DISTANCE(CHIP,
|
||||
.clip_mask = last_shader->clip_mask,
|
||||
.cull_mask = last_shader->cull_mask,
|
||||
));
|
||||
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("bad last_shader type");
|
||||
}
|
||||
|
||||
const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs };
|
||||
|
||||
|
|
@ -1009,18 +1034,50 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
|
||||
bool primid = shader->type != MESA_SHADER_VERTEX &&
|
||||
VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID));
|
||||
bool last = shader == last_shader;
|
||||
|
||||
tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1);
|
||||
if (shader == last_shader) {
|
||||
tu_cs_emit(cs, A6XX_PC_VS_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
|
||||
CONDREG(pointsize_regid, A6XX_PC_VS_CNTL_PSIZE) |
|
||||
CONDREG(layer_regid, A6XX_PC_VS_CNTL_LAYER) |
|
||||
CONDREG(view_regid, A6XX_PC_VS_CNTL_VIEW) |
|
||||
COND(primid, A6XX_PC_VS_CNTL_PRIMITIVE_ID) |
|
||||
A6XX_PC_VS_CNTL_CLIP_MASK(clip_cull_mask) |
|
||||
CONDREG(shading_rate_regid, A6XX_PC_VS_CNTL_SHADINGRATE));
|
||||
} else {
|
||||
tu_cs_emit(cs, COND(primid, A6XX_PC_VS_CNTL_PRIMITIVE_ID));
|
||||
|
||||
switch (shader->type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
crb.add(PC_VS_CNTL(CHIP,
|
||||
.stride_in_vpc = COND(last, linkage.max_loc),
|
||||
.psize = COND(last, VALIDREG(pointsize_regid)),
|
||||
.layer = COND(last, VALIDREG(layer_regid)),
|
||||
.view = COND(last, VALIDREG(view_regid)),
|
||||
.primitive_id = primid,
|
||||
.clip_mask = COND(last, clip_cull_mask),
|
||||
.shadingrate = COND(last, VALIDREG(shading_rate_regid)),
|
||||
));
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
assert(!last);
|
||||
crb.add(PC_HS_CNTL(CHIP,
|
||||
.primitive_id = primid,
|
||||
));
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
crb.add(PC_DS_CNTL(CHIP,
|
||||
.stride_in_vpc = COND(last, linkage.max_loc),
|
||||
.psize = COND(last, VALIDREG(pointsize_regid)),
|
||||
.layer = COND(last, VALIDREG(layer_regid)),
|
||||
.view = COND(last, VALIDREG(view_regid)),
|
||||
.primitive_id = primid,
|
||||
.clip_mask = COND(last, clip_cull_mask),
|
||||
.shadingrate = COND(last, VALIDREG(shading_rate_regid)),
|
||||
));
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
crb.add(PC_GS_CNTL(CHIP,
|
||||
.stride_in_vpc = COND(last, linkage.max_loc),
|
||||
.psize = COND(last, VALIDREG(pointsize_regid)),
|
||||
.layer = COND(last, VALIDREG(layer_regid)),
|
||||
.view = COND(last, VALIDREG(view_regid)),
|
||||
.primitive_id = primid,
|
||||
.clip_mask = COND(last, clip_cull_mask),
|
||||
.shadingrate = COND(last, VALIDREG(shading_rate_regid)),
|
||||
));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1028,24 +1085,67 @@ tu6_emit_vpc(struct tu_cs *cs,
|
|||
if (gs)
|
||||
assert(flags_regid != INVALID_REG);
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
|
||||
tu_cs_emit(cs, A6XX_SP_VS_OUTPUT_CNTL_OUT(linkage.cnt) |
|
||||
A6XX_SP_GS_OUTPUT_CNTL_FLAGS_REGID(flags_regid));
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1);
|
||||
tu_cs_emit(cs, A6XX_VPC_VS_SIV_CNTL_LAYERLOC(layer_loc) |
|
||||
A6XX_VPC_VS_SIV_CNTL_VIEWLOC(view_loc) |
|
||||
A6XX_VPC_VS_SIV_CNTL_SHADINGRATELOC(shading_rate_loc));
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl_v2, 1);
|
||||
tu_cs_emit(cs, A6XX_VPC_VS_SIV_CNTL_LAYERLOC(layer_loc) |
|
||||
A6XX_VPC_VS_SIV_CNTL_VIEWLOC(view_loc) |
|
||||
A6XX_VPC_VS_SIV_CNTL_SHADINGRATELOC(shading_rate_loc));
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1);
|
||||
tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_SU_VS_SIV_CNTL_WRITES_LAYER) |
|
||||
CONDREG(view_regid, A6XX_GRAS_SU_VS_SIV_CNTL_WRITES_VIEW));
|
||||
|
||||
tu6_emit_vpc_varying_modes<CHIP>(cs, fs, last_shader);
|
||||
switch (last_shader->type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
crb.add(A6XX_SP_VS_OUTPUT_CNTL(.out = linkage.cnt));
|
||||
crb.add(VPC_VS_SIV_CNTL(CHIP,
|
||||
.layerloc = layer_loc,
|
||||
.viewloc = view_loc,
|
||||
.shadingrateloc = shading_rate_loc,
|
||||
));
|
||||
if (CHIP <= A7XX) {
|
||||
crb.add(VPC_VS_SIV_CNTL_V2(CHIP,
|
||||
.layerloc = layer_loc,
|
||||
.viewloc = view_loc,
|
||||
.shadingrateloc = shading_rate_loc,
|
||||
));
|
||||
}
|
||||
crb.add(GRAS_SU_VS_SIV_CNTL(CHIP,
|
||||
.writes_layer = VALIDREG(layer_regid),
|
||||
.writes_view = VALIDREG(view_regid),
|
||||
));
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
crb.add(A6XX_SP_DS_OUTPUT_CNTL(.out = linkage.cnt));
|
||||
crb.add(VPC_DS_SIV_CNTL(CHIP,
|
||||
.layerloc = layer_loc,
|
||||
.viewloc = view_loc,
|
||||
.shadingrateloc = shading_rate_loc,
|
||||
));
|
||||
if (CHIP <= A7XX) {
|
||||
crb.add(VPC_DS_SIV_CNTL_V2(CHIP,
|
||||
.layerloc = layer_loc,
|
||||
.viewloc = view_loc,
|
||||
.shadingrateloc = shading_rate_loc,
|
||||
));
|
||||
}
|
||||
crb.add(GRAS_SU_DS_SIV_CNTL(CHIP,
|
||||
.writes_layer = VALIDREG(layer_regid),
|
||||
.writes_view = VALIDREG(view_regid),
|
||||
));
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
crb.add(A6XX_SP_GS_OUTPUT_CNTL(.out = linkage.cnt, .flags_regid = flags_regid));
|
||||
crb.add(VPC_GS_SIV_CNTL(CHIP,
|
||||
.layerloc = layer_loc,
|
||||
.viewloc = view_loc,
|
||||
.shadingrateloc = shading_rate_loc,
|
||||
));
|
||||
if (CHIP <= A7XX) {
|
||||
crb.add(VPC_GS_SIV_CNTL_V2(CHIP,
|
||||
.layerloc = layer_loc,
|
||||
.viewloc = view_loc,
|
||||
.shadingrateloc = shading_rate_loc,
|
||||
));
|
||||
}
|
||||
crb.add(GRAS_SU_GS_SIV_CNTL(CHIP,
|
||||
.writes_layer = VALIDREG(layer_regid),
|
||||
.writes_view = VALIDREG(view_regid),
|
||||
));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("bad last_shader type");
|
||||
}
|
||||
}
|
||||
TU_GENX(tu6_emit_vpc);
|
||||
|
||||
|
|
@ -1159,8 +1259,7 @@ tu6_emit_patch_control_points(struct tu_cs *cs,
|
|||
patch_control_points * vs->variant->output_size / 4;
|
||||
|
||||
/* Total attribute slots in HS incoming patch. */
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_PARAM_1, 1);
|
||||
tu_cs_emit(cs, patch_local_mem_size_16b);
|
||||
tu_cs_emit_regs(cs, PC_HS_PARAM_1(CHIP, patch_local_mem_size_16b));
|
||||
|
||||
const uint32_t wavesize = 64;
|
||||
const uint32_t vs_hs_local_mem_size = 16384;
|
||||
|
|
@ -1266,11 +1365,14 @@ tu6_emit_program_config(struct tu_cs *cs,
|
|||
.ds_state = true, .gs_state = true,
|
||||
.fs_state = true, .gfx_uav = true,
|
||||
.gfx_shared_const = shared_consts_enable));
|
||||
for (size_t stage_idx = MESA_SHADER_VERTEX;
|
||||
stage_idx <= MESA_SHADER_FRAGMENT; stage_idx++) {
|
||||
mesa_shader_stage stage = (mesa_shader_stage) stage_idx;
|
||||
tu6_emit_xs_config<CHIP>(crb, stage, variants[stage]);
|
||||
}
|
||||
|
||||
const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX];
|
||||
const struct ir3_shader_variant *hs = variants[MESA_SHADER_TESS_CTRL];
|
||||
const struct ir3_shader_variant *ds = variants[MESA_SHADER_TESS_EVAL];
|
||||
const struct ir3_shader_variant *gs = variants[MESA_SHADER_GEOMETRY];
|
||||
const struct ir3_shader_variant *fs = variants[MESA_SHADER_FRAGMENT];
|
||||
|
||||
tu6_emit_xs_config<CHIP>(crb, { .vs = vs, .hs = hs, .ds = ds, .gs = gs, .fs = fs });
|
||||
|
||||
crb.flush();
|
||||
|
||||
|
|
@ -1280,11 +1382,6 @@ tu6_emit_program_config(struct tu_cs *cs,
|
|||
tu6_emit_dynamic_offset(cs, variants[stage], shaders[stage], prog);
|
||||
}
|
||||
|
||||
const struct ir3_shader_variant *vs = variants[MESA_SHADER_VERTEX];
|
||||
const struct ir3_shader_variant *hs = variants[MESA_SHADER_TESS_CTRL];
|
||||
const struct ir3_shader_variant *ds = variants[MESA_SHADER_TESS_EVAL];
|
||||
const struct ir3_shader_variant *gs = variants[MESA_SHADER_GEOMETRY];
|
||||
|
||||
if (hs) {
|
||||
tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER);
|
||||
tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER);
|
||||
|
|
@ -1304,8 +1401,9 @@ tu6_emit_program_config(struct tu_cs *cs,
|
|||
uint32_t vec4_size = gs->gs.vertices_in *
|
||||
DIV_ROUND_UP(prev_stage_output_size, 4);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
|
||||
tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
|
||||
tu_cs_emit_regs(cs, PC_PRIMITIVE_CNTL_6(CHIP,
|
||||
.stride_in_vpc = vec4_size,
|
||||
));
|
||||
}
|
||||
|
||||
uint32_t prim_size = prev_stage_output_size;
|
||||
|
|
@ -2866,17 +2964,18 @@ void
|
|||
tu6_emit_sample_locations(struct tu_cs *cs, bool enable,
|
||||
const struct vk_sample_locations_state *samp_loc)
|
||||
{
|
||||
uint32_t sample_config =
|
||||
COND(enable, A6XX_RB_MSAA_SAMPLE_POS_CNTL_LOCATION_ENABLE);
|
||||
tu_cs_emit_regs(cs, GRAS_SC_MSAA_SAMPLE_POS_CNTL(CHIP,
|
||||
.location_enable = enable,
|
||||
));
|
||||
tu_cs_emit_regs(cs, A6XX_RB_MSAA_SAMPLE_POS_CNTL(
|
||||
.location_enable = enable,
|
||||
));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_MSAA_SAMPLE_POS_CNTL, 1);
|
||||
tu_cs_emit(cs, sample_config);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_SAMPLE_POS_CNTL, 1);
|
||||
tu_cs_emit(cs, sample_config);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_TPL1_MSAA_SAMPLE_POS_CNTL, 1);
|
||||
tu_cs_emit(cs, sample_config);
|
||||
if (CHIP <= A7XX) {
|
||||
tu_cs_emit_regs(cs, TPL1_MSAA_SAMPLE_POS_CNTL(CHIP,
|
||||
.location_enable = enable,
|
||||
));
|
||||
}
|
||||
|
||||
if (!enable)
|
||||
return;
|
||||
|
|
@ -2903,14 +3002,21 @@ tu6_emit_sample_locations(struct tu_cs *cs, bool enable,
|
|||
A6XX_RB_PROGRAMMABLE_MSAA_POS_0_SAMPLE_0_Y(y))) << i*8;
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_PROGRAMMABLE_MSAA_POS_0, 2);
|
||||
tu_cs_emit_qw(cs, sample_locations);
|
||||
tu_cs_emit_regs(cs,
|
||||
GRAS_SC_PROGRAMMABLE_MSAA_POS_0(CHIP, .dword = sample_locations),
|
||||
GRAS_SC_PROGRAMMABLE_MSAA_POS_1(CHIP, .dword = sample_locations >> 32),
|
||||
);
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_PROGRAMMABLE_MSAA_POS_0(.dword = sample_locations),
|
||||
A6XX_RB_PROGRAMMABLE_MSAA_POS_1(.dword = sample_locations >> 32),
|
||||
);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_PROGRAMMABLE_MSAA_POS_0, 2);
|
||||
tu_cs_emit_qw(cs, sample_locations);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_TPL1_PROGRAMMABLE_MSAA_POS_0, 2);
|
||||
tu_cs_emit_qw(cs, sample_locations);
|
||||
if (CHIP <= A7XX) {
|
||||
tu_cs_emit_regs(cs,
|
||||
TPL1_PROGRAMMABLE_MSAA_POS_0(CHIP, .dword = sample_locations),
|
||||
TPL1_PROGRAMMABLE_MSAA_POS_1(CHIP, .dword = sample_locations >> 32),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static const enum mesa_vk_dynamic_graphics_state tu_depth_bias_state[] = {
|
||||
|
|
|
|||
|
|
@ -301,11 +301,14 @@ struct tu_pvtmem_config {
|
|||
bool per_wave;
|
||||
};
|
||||
|
||||
struct tu_shader_stages {
|
||||
const struct ir3_shader_variant *vs, *hs, *ds, *gs, *fs, *cs;
|
||||
};
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
tu6_emit_xs_config(struct tu_crb &crb,
|
||||
mesa_shader_stage stage,
|
||||
const struct ir3_shader_variant *xs);
|
||||
struct tu_shader_stages stages);
|
||||
|
||||
template <chip CHIP>
|
||||
void
|
||||
|
|
|
|||
|
|
@ -27,7 +27,19 @@
|
|||
|
||||
#define NSEC_PER_SEC 1000000000ull
|
||||
#define WAIT_TIMEOUT 5
|
||||
#define STAT_COUNT ((REG_A6XX_RBBM_PIPESTAT_CSINVOCATIONS - REG_A6XX_RBBM_PIPESTAT_IAVERTICES) / 2 + 1)
|
||||
#define __COUNTER_REG(CHIP, name) __RBBM_PIPESTAT_ ## name <CHIP>({}).reg
|
||||
#define COUNTER_REG(name) __COUNTER_REG(CHIP, name)
|
||||
|
||||
/* Note: gen8 changes the order of the pipestat regs, but in either case
|
||||
* they ones we are interested in are consecutive, so for the purposes of
|
||||
* knowning how many values to read we can just use A6XX reg addresses.
|
||||
*
|
||||
* And in both cases, RBBM_PIPESTAT_IAVERTICES is the first one.
|
||||
*
|
||||
* Depending on how/if they shuffle around in the future, we might need
|
||||
* to shift to reading them individually, like gallium does.
|
||||
*/
|
||||
#define STAT_COUNT ((__COUNTER_REG(A6XX, CSINVOCATIONS) - __COUNTER_REG(A6XX, IAVERTICES)) / 2 + 1)
|
||||
|
||||
struct PACKED query_slot {
|
||||
uint64_t available;
|
||||
|
|
@ -463,35 +475,38 @@ get_result_count(struct tu_query_pool *pool)
|
|||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static uint32_t
|
||||
statistics_index(uint32_t *statistics)
|
||||
{
|
||||
uint32_t stat;
|
||||
stat = u_bit_scan(statistics);
|
||||
|
||||
#define COUNTER_OFFSET(name) ((COUNTER_REG(name) - COUNTER_REG(IAVERTICES)) / 2)
|
||||
|
||||
switch (1 << stat) {
|
||||
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT:
|
||||
return 0;
|
||||
return COUNTER_OFFSET(IAVERTICES);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT:
|
||||
return 1;
|
||||
return COUNTER_OFFSET(IAPRIMITIVES);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT:
|
||||
return 2;
|
||||
return COUNTER_OFFSET(VSINVOCATIONS);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT:
|
||||
return 5;
|
||||
return COUNTER_OFFSET(GSINVOCATIONS);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT:
|
||||
return 6;
|
||||
return COUNTER_OFFSET(GSPRIMITIVES);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT:
|
||||
return 7;
|
||||
return COUNTER_OFFSET(CINVOCATIONS);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT:
|
||||
return 8;
|
||||
return COUNTER_OFFSET(CPRIMITIVES);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT:
|
||||
return 9;
|
||||
return COUNTER_OFFSET(PSINVOCATIONS);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT:
|
||||
return 3;
|
||||
return COUNTER_OFFSET(HSINVOCATIONS);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT:
|
||||
return 4;
|
||||
return COUNTER_OFFSET(DSINVOCATIONS);
|
||||
case VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT:
|
||||
return 10;
|
||||
return COUNTER_OFFSET(CSINVOCATIONS);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -588,6 +603,7 @@ write_performance_query_value_cpu(char *base,
|
|||
}
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
static VkResult
|
||||
get_query_pool_results(struct tu_device *device,
|
||||
struct tu_query_pool *pool,
|
||||
|
|
@ -634,7 +650,7 @@ get_query_pool_results(struct tu_device *device,
|
|||
uint64_t *result;
|
||||
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
||||
uint32_t stat_idx = statistics_index(&statistics);
|
||||
uint32_t stat_idx = statistics_index<CHIP>(&statistics);
|
||||
result = query_result_addr(pool, query, uint64_t, stat_idx);
|
||||
} else if (is_perf_query_raw(pool)) {
|
||||
result = query_result_addr(pool, query, struct perfcntr_query_slot, k);
|
||||
|
|
@ -703,6 +719,7 @@ get_query_pool_results(struct tu_device *device,
|
|||
return result;
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
tu_GetQueryPoolResults(VkDevice _device,
|
||||
VkQueryPool queryPool,
|
||||
|
|
@ -731,13 +748,14 @@ tu_GetQueryPoolResults(VkDevice _device,
|
|||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||
return get_query_pool_results(device, pool, firstQuery, queryCount,
|
||||
return get_query_pool_results<CHIP>(device, pool, firstQuery, queryCount,
|
||||
dataSize, pData, stride, flags);
|
||||
default:
|
||||
assert(!"Invalid query type");
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
TU_GENX(tu_GetQueryPoolResults);
|
||||
|
||||
/* Copies a query value from one buffer to another from the GPU. */
|
||||
static void
|
||||
|
|
@ -808,7 +826,7 @@ emit_copy_query_pool_results(struct tu_cmd_buffer *cmdbuf,
|
|||
uint64_t result_iova;
|
||||
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
||||
uint32_t stat_idx = statistics_index(&statistics);
|
||||
uint32_t stat_idx = statistics_index<CHIP>(&statistics);
|
||||
result_iova = query_result_iova(pool, query, uint64_t, stat_idx);
|
||||
} else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
|
||||
result_iova = query_result_iova(pool, query,
|
||||
|
|
@ -895,6 +913,7 @@ tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
TU_GENX(tu_CmdCopyQueryPoolResults);
|
||||
|
||||
template <chip CHIP>
|
||||
static void
|
||||
emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_query_pool *pool,
|
||||
|
|
@ -915,7 +934,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
|
|||
uint64_t result_iova;
|
||||
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
|
||||
uint32_t stat_idx = statistics_index(&statistics);
|
||||
uint32_t stat_idx = statistics_index<CHIP>(&statistics);
|
||||
result_iova = query_result_iova(pool, query, uint64_t, stat_idx);
|
||||
} else if (is_perf_query_raw(pool)) {
|
||||
result_iova = query_result_iova(pool, query,
|
||||
|
|
@ -949,6 +968,7 @@ emit_reset_query_pool(struct tu_cmd_buffer *cmdbuf,
|
|||
|
||||
}
|
||||
|
||||
template <chip CHIP>
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
||||
VkQueryPool queryPool,
|
||||
|
|
@ -969,12 +989,13 @@ tu_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
|||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||
emit_reset_query_pool(cmdbuf, pool, firstQuery, queryCount);
|
||||
emit_reset_query_pool<CHIP>(cmdbuf, pool, firstQuery, queryCount);
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid query type");
|
||||
}
|
||||
}
|
||||
TU_GENX(tu_CmdResetQueryPool);
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_ResetQueryPool(VkDevice device,
|
||||
|
|
@ -1147,7 +1168,7 @@ emit_begin_stat_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) |
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) |
|
||||
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
|
||||
CP_REG_TO_MEM_0_64B);
|
||||
tu_cs_emit_qw(cs, begin_iova);
|
||||
|
|
@ -1365,7 +1386,7 @@ emit_begin_prim_generated_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_CINVOCATIONS) |
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(CINVOCATIONS)) |
|
||||
CP_REG_TO_MEM_0_CNT(2) |
|
||||
CP_REG_TO_MEM_0_64B);
|
||||
tu_cs_emit_qw(cs, begin_iova);
|
||||
|
|
@ -1633,7 +1654,7 @@ emit_end_stat_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_IAVERTICES) |
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(IAVERTICES)) |
|
||||
CP_REG_TO_MEM_0_CNT(STAT_COUNT * 2) |
|
||||
CP_REG_TO_MEM_0_64B);
|
||||
tu_cs_emit_qw(cs, end_iova);
|
||||
|
|
@ -1918,7 +1939,7 @@ emit_end_prim_generated_query(struct tu_cmd_buffer *cmdbuf,
|
|||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_RBBM_PIPESTAT_CINVOCATIONS) |
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(COUNTER_REG(CINVOCATIONS)) |
|
||||
CP_REG_TO_MEM_0_CNT(2) |
|
||||
CP_REG_TO_MEM_0_64B);
|
||||
tu_cs_emit_qw(cs, end_iova);
|
||||
|
|
@ -2031,6 +2052,7 @@ tu_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
TU_GENX(tu_CmdEndQueryIndexedEXT);
|
||||
|
||||
template <chip CHIP>
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
||||
VkPipelineStageFlagBits2 pipelineStage,
|
||||
|
|
@ -2067,7 +2089,7 @@ tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(REG_A6XX_CP_ALWAYS_ON_COUNTER) |
|
||||
tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(__CP_ALWAYS_ON_COUNTER<CHIP>({}).reg) |
|
||||
CP_REG_TO_MEM_0_CNT(2) |
|
||||
CP_REG_TO_MEM_0_64B);
|
||||
tu_cs_emit_qw(cs, query_result_iova(pool, query, uint64_t, 0));
|
||||
|
|
@ -2108,6 +2130,7 @@ tu_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
|||
*/
|
||||
handle_multiview_queries(cmd, pool, query);
|
||||
}
|
||||
TU_GENX(tu_CmdWriteTimestamp2);
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_CmdWriteAccelerationStructuresPropertiesKHR(VkCommandBuffer commandBuffer,
|
||||
|
|
|
|||
|
|
@ -1781,7 +1781,7 @@ tu6_emit_cs_config(struct tu_cs *cs,
|
|||
|
||||
crb.add(SP_UPDATE_CNTL(CHIP, .cs_state = true, .cs_uav = true,
|
||||
.cs_shared_const = shared_consts_enable));
|
||||
tu6_emit_xs_config<CHIP>(crb, MESA_SHADER_COMPUTE, v);
|
||||
tu6_emit_xs_config<CHIP>(crb, { .cs = v });
|
||||
tu6_emit_xs(crb, cs->device, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
|
||||
}
|
||||
tu6_emit_xs_constants(cs, MESA_SHADER_COMPUTE, v, binary_iova);
|
||||
|
|
@ -2031,50 +2031,54 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
|
|||
need_size = true;
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_INTERP_CNTL, 1);
|
||||
tu_cs_emit(cs,
|
||||
CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_PIXEL) |
|
||||
CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_CENTROID) |
|
||||
CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CL_INTERP_CNTL_IJ_PERSP_SAMPLE) |
|
||||
CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_PIXEL) |
|
||||
CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_CENTROID) |
|
||||
CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_SAMPLE) |
|
||||
COND(need_size, A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_PIXEL) |
|
||||
COND(need_size_persamp, A6XX_GRAS_CL_INTERP_CNTL_IJ_LINEAR_SAMPLE) |
|
||||
COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CL_INTERP_CNTL_COORD_MASK(fs->fragcoord_compmask)));
|
||||
tu_cs_emit_regs(cs,
|
||||
GRAS_CL_INTERP_CNTL(CHIP,
|
||||
.ij_persp_pixel = VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
|
||||
.ij_persp_centroid = VALIDREG(ij_regid[IJ_PERSP_CENTROID]),
|
||||
.ij_persp_sample = VALIDREG(ij_regid[IJ_PERSP_SAMPLE]),
|
||||
.ij_linear_pixel = VALIDREG(ij_regid[IJ_LINEAR_PIXEL]) || need_size,
|
||||
.ij_linear_centroid = VALIDREG(ij_regid[IJ_LINEAR_CENTROID]),
|
||||
.ij_linear_sample = VALIDREG(ij_regid[IJ_LINEAR_SAMPLE]) || need_size_persamp,
|
||||
.coord_mask = fs->fragcoord_compmask,
|
||||
)
|
||||
);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_INTERP_CNTL, 2);
|
||||
tu_cs_emit(cs,
|
||||
CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_RB_INTERP_CNTL_IJ_PERSP_PIXEL) |
|
||||
CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_RB_INTERP_CNTL_IJ_PERSP_CENTROID) |
|
||||
CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_RB_INTERP_CNTL_IJ_PERSP_SAMPLE) |
|
||||
CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_RB_INTERP_CNTL_IJ_LINEAR_PIXEL) |
|
||||
CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_RB_INTERP_CNTL_IJ_LINEAR_CENTROID) |
|
||||
CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_RB_INTERP_CNTL_IJ_LINEAR_SAMPLE) |
|
||||
COND(need_size, A6XX_RB_INTERP_CNTL_IJ_LINEAR_PIXEL) |
|
||||
COND(enable_varyings, A6XX_RB_INTERP_CNTL_INTERP_EN) |
|
||||
COND(need_size_persamp, A6XX_RB_INTERP_CNTL_IJ_LINEAR_SAMPLE) |
|
||||
COND(fs->fragcoord_compmask != 0,
|
||||
A6XX_RB_INTERP_CNTL_COORD_MASK(fs->fragcoord_compmask)));
|
||||
tu_cs_emit(cs,
|
||||
A6XX_RB_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE(
|
||||
sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER) |
|
||||
CONDREG(smask_in_regid, A6XX_RB_PS_INPUT_CNTL_SAMPLEMASK) |
|
||||
CONDREG(samp_id_regid, A6XX_RB_PS_INPUT_CNTL_SAMPLEID) |
|
||||
CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_PS_INPUT_CNTL_CENTERRHW) |
|
||||
COND(fs->frag_face, A6XX_RB_PS_INPUT_CNTL_FACENESS) |
|
||||
CONDREG(shading_rate_regid, A6XX_RB_PS_INPUT_CNTL_FOVEATION));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_INTERP_CNTL(
|
||||
.ij_persp_pixel = VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
|
||||
.ij_persp_centroid = VALIDREG(ij_regid[IJ_PERSP_CENTROID]),
|
||||
.ij_persp_sample = VALIDREG(ij_regid[IJ_PERSP_SAMPLE]),
|
||||
.ij_linear_pixel = VALIDREG(ij_regid[IJ_LINEAR_PIXEL]) || need_size,
|
||||
.ij_linear_centroid = VALIDREG(ij_regid[IJ_LINEAR_CENTROID]),
|
||||
.ij_linear_sample = VALIDREG(ij_regid[IJ_LINEAR_SAMPLE]) || need_size_persamp,
|
||||
.coord_mask = fs->fragcoord_compmask,
|
||||
.interp_en = enable_varyings,
|
||||
),
|
||||
A6XX_RB_PS_INPUT_CNTL(
|
||||
.samplemask = VALIDREG(smask_in_regid),
|
||||
.postdepthcoverage = fs->post_depth_coverage,
|
||||
.faceness = fs->frag_face,
|
||||
.sampleid = VALIDREG(samp_id_regid),
|
||||
.fragcoordsamplemode = sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER,
|
||||
.centerrhw = VALIDREG(ij_regid[IJ_PERSP_CENTER_RHW]),
|
||||
.foveation = VALIDREG(shading_rate_regid),
|
||||
),
|
||||
);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_PS_SAMPLEFREQ_CNTL, 1);
|
||||
tu_cs_emit(cs, COND(sample_shading, A6XX_RB_PS_SAMPLEFREQ_CNTL_PER_SAMP_MODE));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_PS_SAMPLEFREQ_CNTL(sample_shading)
|
||||
);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 1);
|
||||
tu_cs_emit(cs, CONDREG(samp_id_regid, A6XX_GRAS_LRZ_PS_INPUT_CNTL_SAMPLEID) |
|
||||
A6XX_GRAS_LRZ_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE(
|
||||
sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER));
|
||||
tu_cs_emit_regs(cs,
|
||||
GRAS_LRZ_PS_INPUT_CNTL(CHIP,
|
||||
.sampleid = VALIDREG(samp_id_regid),
|
||||
.fragcoordsamplemode = sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER,
|
||||
)
|
||||
);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL, 1);
|
||||
tu_cs_emit(cs, COND(sample_shading, A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL_PER_SAMP_MODE));
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_GRAS_LRZ_PS_SAMPLEFREQ_CNTL(sample_shading)
|
||||
);
|
||||
|
||||
uint32_t varmask[4] = { 0 };
|
||||
|
||||
|
|
@ -2200,11 +2204,11 @@ tu6_emit_vs(struct tu_cs *cs,
|
|||
bool multi_pos_output = vs->multi_pos_output;
|
||||
|
||||
uint32_t multiview_views = util_logbase2(view_mask) + 1;
|
||||
uint32_t multiview_cntl = view_mask ?
|
||||
A6XX_PC_STEREO_RENDERING_CNTL_ENABLE |
|
||||
A6XX_PC_STEREO_RENDERING_CNTL_VIEWS(multiview_views) |
|
||||
COND(!multi_pos_output, A6XX_PC_STEREO_RENDERING_CNTL_DISABLEMULTIPOS)
|
||||
: 0;
|
||||
struct fd_reg_pair multiview_cntl = PC_STEREO_RENDERING_CNTL(CHIP,
|
||||
.enable = view_mask,
|
||||
.disablemultipos = !multi_pos_output,
|
||||
.views = multiview_views,
|
||||
);
|
||||
|
||||
/* Copy what the blob does here. This will emit an extra 0x3f
|
||||
* CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
|
||||
|
|
@ -2213,27 +2217,31 @@ tu6_emit_vs(struct tu_cs *cs,
|
|||
if (cs->device->physical_device->info->props.has_cp_reg_write) {
|
||||
tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
|
||||
tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE));
|
||||
tu_cs_emit(cs, REG_A6XX_PC_STEREO_RENDERING_CNTL);
|
||||
tu_cs_emit(cs, multiview_cntl.reg);
|
||||
} else {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_PC_STEREO_RENDERING_CNTL, 1);
|
||||
tu_cs_emit_pkt4(cs, multiview_cntl.reg, 1);
|
||||
}
|
||||
tu_cs_emit(cs, multiview_cntl);
|
||||
tu_cs_emit(cs, multiview_cntl.value);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_VFD_STEREO_RENDERING_CNTL, 1);
|
||||
tu_cs_emit(cs, multiview_cntl);
|
||||
tu_cs_emit_regs(cs, A6XX_VFD_STEREO_RENDERING_CNTL(
|
||||
.enable = view_mask,
|
||||
.disablemultipos = !multi_pos_output,
|
||||
.views = multiview_views,
|
||||
));
|
||||
|
||||
if (multiview_cntl &&
|
||||
if (view_mask &&
|
||||
cs->device->physical_device->info->props.supports_multiview_mask) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_PC_STEREO_RENDERING_VIEWMASK, 1);
|
||||
tu_cs_emit(cs, view_mask);
|
||||
tu_cs_emit_regs(cs, PC_STEREO_RENDERING_VIEWMASK(CHIP, view_mask));
|
||||
}
|
||||
|
||||
if (CHIP >= A7XX) {
|
||||
tu_cs_emit_pkt4(cs, REG_A7XX_VPC_STEREO_RENDERING_CNTL, 1);
|
||||
tu_cs_emit(cs, multiview_cntl);
|
||||
tu_cs_emit_regs(cs, VPC_STEREO_RENDERING_CNTL(CHIP,
|
||||
.enable = view_mask,
|
||||
.disablemultipos = !multi_pos_output,
|
||||
.views = multiview_views,
|
||||
));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A7XX_VPC_STEREO_RENDERING_VIEWMASK, 1);
|
||||
tu_cs_emit(cs, view_mask);
|
||||
tu_cs_emit_regs(cs, VPC_STEREO_RENDERING_VIEWMASK(CHIP, view_mask));
|
||||
}
|
||||
|
||||
tu6_emit_vfd_dest(cs, vs);
|
||||
|
|
@ -2276,8 +2284,7 @@ tu6_emit_hs(struct tu_cs *cs,
|
|||
A6XX_VFD_CNTL_2_REGID_INVOCATIONID(hs_invocation_regid));
|
||||
|
||||
if (hs) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_PARAM_0, 1);
|
||||
tu_cs_emit(cs, hs->tess.tcs_vertices_out);
|
||||
tu_cs_emit_regs(cs, PC_HS_PARAM_0(CHIP, hs->tess.tcs_vertices_out));
|
||||
}
|
||||
}
|
||||
TU_GENX(tu6_emit_hs);
|
||||
|
|
@ -2524,7 +2531,7 @@ tu_upload_shader(struct tu_device *dev,
|
|||
size += TU6_EMIT_VFD_DEST_MAX_DWORDS;
|
||||
|
||||
const unsigned xs_size = 128;
|
||||
const unsigned vpc_size = 32 + (v->stream_output.num_outputs != 0 ? 256 : 0);
|
||||
const unsigned vpc_size = 64 + (v->stream_output.num_outputs != 0 ? 256 : 0);
|
||||
|
||||
for (auto& variant : {v, binning, safe_const, safe_const_binning}) {
|
||||
if (variant) {
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "freedreno_batch.h"
|
||||
|
||||
#include "fd6_barrier.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_blend.h"
|
||||
#include "util/u_dual_blend.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "util/format_srgb.h"
|
||||
#include "util/half_float.h"
|
||||
#include "util/u_dump.h"
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
*/
|
||||
|
||||
#include "drm/freedreno_ringbuffer.h"
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_dump.h"
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "fd6_barrier.h"
|
||||
#include "fd6_const.h"
|
||||
#include "fd6_compute.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "freedreno_query_acc.h"
|
||||
#include "freedreno_state.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_prim.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/u_helpers.h"
|
||||
|
|
|
|||
|
|
@ -220,13 +220,17 @@ __event_write(fd_cs &cs, enum fd_gpu_event event,
|
|||
fd_pkt7 pkt(cs, CP_EVENT_WRITE, len);
|
||||
|
||||
if (CHIP == A6XX) {
|
||||
pkt.add(CP_EVENT_WRITE_0_EVENT(info.raw_event) |
|
||||
COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP));
|
||||
pkt.add(CP_EVENT_WRITE_0(
|
||||
.event = info.raw_event,
|
||||
.timestamp = info.needs_seqno,
|
||||
));
|
||||
} else if (CHIP >= A7XX) {
|
||||
pkt.add(CP_EVENT_WRITE7_0_EVENT(info.raw_event) |
|
||||
CP_EVENT_WRITE7_0_WRITE_SRC(esrc) |
|
||||
CP_EVENT_WRITE7_0_WRITE_DST(edst) |
|
||||
COND(info.needs_seqno, CP_EVENT_WRITE7_0_WRITE_ENABLED));
|
||||
pkt.add(CP_EVENT_WRITE7_0(
|
||||
.event = info.raw_event,
|
||||
.write_src = esrc,
|
||||
.write_dst = edst,
|
||||
.write_enabled = info.needs_seqno,
|
||||
));
|
||||
}
|
||||
|
||||
if (info.needs_seqno) {
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
#include "freedreno_resource.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
|
|
@ -884,9 +882,6 @@ emit_vpc(fd_crb &crb, const struct program_builder *b)
|
|||
}
|
||||
}
|
||||
|
||||
/* if vertex_flags somehow gets optimized out, your gonna have a bad time: */
|
||||
assert(flags_regid != INVALID_REG);
|
||||
|
||||
switch (last_shader->type) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
crb.add(A6XX_SP_VS_OUTPUT_CNTL(.out = linkage.cnt));
|
||||
|
|
@ -927,6 +922,9 @@ emit_vpc(fd_crb &crb, const struct program_builder *b)
|
|||
));
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
/* if vertex_flags somehow gets optimized out, your gonna have a bad time: */
|
||||
assert(flags_regid != INVALID_REG);
|
||||
|
||||
crb.add(A6XX_SP_GS_OUTPUT_CNTL(.out = linkage.cnt, .flags_regid = flags_regid));
|
||||
crb.add(VPC_GS_SIV_CNTL(CHIP,
|
||||
.layerloc = layer_loc,
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
/* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */
|
||||
|
||||
#include "freedreno_query_acc.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_string.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
|
||||
#include "a6xx/fd6_blitter.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/format/u_format.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/hash_table.h"
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
#include "freedreno_batch.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#define FD_BO_NO_HARDPIN 1
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_string.h"
|
||||
|
|
|
|||
|
|
@ -235,6 +235,7 @@ freedreno_c_args += cc.get_supported_arguments([
|
|||
|
||||
freedreno_cpp_args = []
|
||||
freedreno_cpp_args += cpp.get_supported_arguments([
|
||||
'-DFD_BO_NO_HARDPIN=1',
|
||||
'-fno-exceptions',
|
||||
'-fno-rtti',
|
||||
'-Wno-address-of-packed-member',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue