freedreno: gen8 support

Enable gen8 support.  Sysmem, gmem, and binning work.  DEQP gles2/3/31
tests are passing.

LRZ is not supported yet, and will follow later.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38450>
This commit is contained in:
Rob Clark 2025-10-27 12:37:54 -07:00 committed by Marge Bot
parent 469a19f66b
commit 11364f4ee2
18 changed files with 455 additions and 117 deletions

View file

@ -31,6 +31,9 @@ fd6_emit_flushes(struct fd_context *ctx, fd_cs &cs, unsigned flushes)
if (flushes & FD6_INVALIDATE_CCU_DEPTH) if (flushes & FD6_INVALIDATE_CCU_DEPTH)
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_DEPTH); fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_DEPTH);
if ((CHIP >= A7XX) && (flushes & FD6_INVALIDATE_CCHE))
fd_pkt7(cs, CP_CCHE_INVALIDATE, 0);
if (flushes & FD6_FLUSH_CACHE) if (flushes & FD6_FLUSH_CACHE)
fd6_event_write<CHIP>(ctx, cs, FD_CACHE_CLEAN); fd6_event_write<CHIP>(ctx, cs, FD_CACHE_CLEAN);

View file

@ -20,9 +20,11 @@ enum fd6_flush {
FD6_INVALIDATE_CCU_DEPTH = BIT(3), FD6_INVALIDATE_CCU_DEPTH = BIT(3),
FD6_FLUSH_CACHE = BIT(4), FD6_FLUSH_CACHE = BIT(4),
FD6_INVALIDATE_CACHE = BIT(5), FD6_INVALIDATE_CACHE = BIT(5),
FD6_WAIT_MEM_WRITES = BIT(6), FD6_INVALIDATE_CCHE = BIT(6),
FD6_WAIT_FOR_IDLE = BIT(7),
FD6_WAIT_FOR_ME = BIT(8), FD6_WAIT_MEM_WRITES = BIT(16),
FD6_WAIT_FOR_IDLE = BIT(17),
FD6_WAIT_FOR_ME = BIT(18),
}; };
template <chip CHIP> template <chip CHIP>

View file

@ -60,7 +60,7 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
if (!so) if (!so)
return NULL; return NULL;
unsigned nregs = (2 * A6XX_MAX_RENDER_TARGETS) + 3; unsigned nregs = (3 * A6XX_MAX_RENDER_TARGETS) + 3;
fd_crb crb(blend->ctx->pipe, nregs); fd_crb crb(blend->ctx->pipe, nregs);
for (unsigned i = 0; i <= cso->max_rt; i++) { for (unsigned i = 0; i <= cso->max_rt; i++) {
@ -87,6 +87,14 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
.component_enable = rt->colormask, .component_enable = rt->colormask,
)); ));
if (CHIP == A8XX) {
crb.add(SP_MRT_BLEND_CNTL_REG(CHIP, i,
.color_blend_en = rt->blend_enable,
.alpha_blend_en = rt->blend_enable,
.component_write_mask = rt->colormask,
));
}
if (rt->blend_enable) { if (rt->blend_enable) {
mrt_blend |= (1 << i); mrt_blend |= (1 << i);
} }
@ -114,6 +122,7 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend,
.independent_blend_en = cso->independent_blend_enable, .independent_blend_en = cso->independent_blend_enable,
.dual_color_in_enable = blend->use_dual_src_blend, .dual_color_in_enable = blend->use_dual_src_blend,
.alpha_to_coverage = cso->alpha_to_coverage, .alpha_to_coverage = cso->alpha_to_coverage,
.alpha_to_one = cso->alpha_to_one,
)) ))
.add(A6XX_RB_BLEND_CNTL( .add(A6XX_RB_BLEND_CNTL(
.blend_reads_dest = mrt_blend, .blend_reads_dest = mrt_blend,

View file

@ -38,7 +38,7 @@ cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb,
enum a6xx_threadsize thrsz_cs = ctx->screen->info->props enum a6xx_threadsize thrsz_cs = ctx->screen->info->props
.supports_double_threadsize ? thrsz : THREAD128; .supports_double_threadsize ? thrsz : THREAD128;
if (CHIP == A7XX) { if (CHIP >= A7XX) {
unsigned tile_height = (local_size[1] % 8 == 0) ? 3 unsigned tile_height = (local_size[1] % 8 == 0) ? 3
: (local_size[1] % 4 == 0) ? 5 : (local_size[1] % 4 == 0) ? 5
: (local_size[1] % 2 == 0) ? 9 : (local_size[1] % 2 == 0) ? 9

View file

@ -313,8 +313,10 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv,
fd_crb crb(fd6_ctx->base.pipe, 3); fd_crb crb(fd6_ctx->base.pipe, 3);
crb.add(GRAS_SC_MSAA_SAMPLE_POS_CNTL(CHIP)) crb.add(GRAS_SC_MSAA_SAMPLE_POS_CNTL(CHIP))
.add(A6XX_RB_MSAA_SAMPLE_POS_CNTL()) .add(A6XX_RB_MSAA_SAMPLE_POS_CNTL());
.add(TPL1_MSAA_SAMPLE_POS_CNTL(CHIP));
if (CHIP < A8XX)
crb.add(TPL1_MSAA_SAMPLE_POS_CNTL(CHIP));
fd6_ctx->sample_locations_disable_stateobj = crb; fd6_ctx->sample_locations_disable_stateobj = crb;

View file

@ -435,6 +435,10 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
/* convert from # of patches to draw count */ /* convert from # of patches to draw count */
subdraw_size *= ctx->patch_vertices; subdraw_size *= ctx->patch_vertices;
/* For gen8 tess_bo is sized for two draws, adjust subdraw size accordingly: */
if (CHIP >= A8XX)
subdraw_size /= 2;
fd_pkt7(cs, CP_SET_SUBDRAW_SIZE, 1) fd_pkt7(cs, CP_SET_SUBDRAW_SIZE, 1)
.add(subdraw_size); .add(subdraw_size);

View file

@ -21,6 +21,7 @@
#include "freedreno_state.h" #include "freedreno_state.h"
#include "freedreno_stompable_regs.h" #include "freedreno_stompable_regs.h"
#include "freedreno_tracepoints.h" #include "freedreno_tracepoints.h"
#include "freedreno_vrs.h"
#include "fd6_blend.h" #include "fd6_blend.h"
#include "fd6_const.h" #include "fd6_const.h"
@ -464,7 +465,7 @@ fd6_emit_non_group(fd_cs &cs, struct fd6_emit *emit) assert_dt
const enum fd_dirty_3d_state dirty = ctx->dirty; const enum fd_dirty_3d_state dirty = ctx->dirty;
unsigned num_viewports = emit->prog->num_viewports; unsigned num_viewports = emit->prog->num_viewports;
fd_crb crb(cs, 324); fd_crb crb(cs, 2 + (12 * num_viewports));
if (dirty & FD_DIRTY_STENCIL_REF) { if (dirty & FD_DIRTY_STENCIL_REF) {
struct pipe_stencil_ref *sr = &ctx->stencil_ref; struct pipe_stencil_ref *sr = &ctx->stencil_ref;
@ -506,8 +507,11 @@ fd6_emit_non_group(fd_cs &cs, struct fd6_emit *emit) assert_dt
crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MIN(CHIP, i, zmin)); crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MIN(CHIP, i, zmin));
crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MAX(CHIP, i, zmax)); crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MAX(CHIP, i, zmax));
/* TODO: what to do about this and multi viewport ? */ if (CHIP >= A8XX) {
if (i == 0) { crb.add(RB_VIEWPORT_ZCLAMP_MIN_REG(CHIP, i, zmin));
crb.add(RB_VIEWPORT_ZCLAMP_MAX_REG(CHIP, i, zmax));
} else if (i == 0) {
/* TODO: what to do about this and multi viewport ? */
crb.add(RB_VIEWPORT_ZCLAMP_MIN(CHIP, zmin)); crb.add(RB_VIEWPORT_ZCLAMP_MIN(CHIP, zmin));
crb.add(RB_VIEWPORT_ZCLAMP_MAX(CHIP, zmax)); crb.add(RB_VIEWPORT_ZCLAMP_MAX(CHIP, zmax));
} }
@ -764,7 +768,24 @@ fd6_emit_gmem_cache_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem)
uint32_t depth_offset = cfg->depth_ccu_offset & 0x1fffff; uint32_t depth_offset = cfg->depth_ccu_offset & 0x1fffff;
uint32_t depth_offset_hi = cfg->depth_ccu_offset >> 21; uint32_t depth_offset_hi = cfg->depth_ccu_offset >> 21;
if (CHIP == A7XX) { if (CHIP == A8XX) {
fd_crb(cs, 10)
.add(RB_CCU_CACHE_CNTL(CHIP,
.depth_cache_size = (enum a6xx_ccu_cache_size)cfg->depth_cache_fraction,
.depth_offset = cfg->depth_ccu_offset,
.color_cache_size = (enum a6xx_ccu_cache_size)cfg->color_cache_fraction,
.color_offset = cfg->color_ccu_offset,
))
.add(VPC_ATTR_BUF_GMEM_SIZE(CHIP, cfg->vpc_attr_buf_size))
.add(VPC_ATTR_BUF_GMEM_BASE(CHIP, cfg->vpc_attr_buf_offset))
.add(PC_ATTR_BUF_GMEM_SIZE(CHIP, cfg->vpc_attr_buf_size))
.add(VPC_POS_BUF_GMEM_SIZE(CHIP, cfg->vpc_pos_buf_size))
.add(VPC_POS_BUF_GMEM_BASE(CHIP, cfg->vpc_pos_buf_offset))
.add(PC_POS_BUF_GMEM_SIZE(CHIP, cfg->vpc_pos_buf_size))
.add(VPC_BV_POS_BUF_GMEM_BASE(CHIP, cfg->vpc_bv_pos_buf_offset))
.add(VPC_BV_POS_BUF_GMEM_SIZE(CHIP, cfg->vpc_bv_pos_buf_size))
.add(PC_BV_POS_BUF_GMEM_SIZE(CHIP, cfg->vpc_bv_pos_buf_size));
} else if (CHIP == A7XX) {
fd_pkt4(cs, 1) fd_pkt4(cs, 1)
.add(RB_CCU_CACHE_CNTL(CHIP, .add(RB_CCU_CACHE_CNTL(CHIP,
.depth_offset_hi = depth_offset_hi, .depth_offset_hi = depth_offset_hi,
@ -821,7 +842,7 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
fd_ncrb<CHIP> ncrb(cs, 28 + ARRAY_SIZE(screen->info->magic_raw)); fd_ncrb<CHIP> ncrb(cs, 28 + ARRAY_SIZE(screen->info->magic_raw));
if (CHIP >= A7XX) { if (CHIP == A7XX) {
/* On A7XX, RB_CCU_CNTL was broken into two registers, RB_CCU_CNTL which has /* On A7XX, RB_CCU_CNTL was broken into two registers, RB_CCU_CNTL which has
* static properties that can be set once, this requires a WFI to take effect. * static properties that can be set once, this requires a WFI to take effect.
* While the newly introduced register RB_CCU_CACHE_CNTL has properties that may * While the newly introduced register RB_CCU_CACHE_CNTL has properties that may
@ -858,12 +879,19 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
ncrb.add({ .reg = magic_reg.reg, .value = value }); ncrb.add({ .reg = magic_reg.reg, .value = value });
} }
ncrb.add(A6XX_RB_DBG_ECO_CNTL(.dword = screen->info->magic.RB_DBG_ECO_CNTL)); /* gen8 moves magic reg setup to KMD and blocks access from UMD:
ncrb.add(A6XX_SP_NC_MODE_CNTL_2(.f16_no_inf = true)); */
if (CHIP < A8XX) {
ncrb.add(A6XX_RB_DBG_ECO_CNTL(.dword = screen->info->magic.RB_DBG_ECO_CNTL));
ncrb.add(A6XX_SP_NC_MODE_CNTL_2(.f16_no_inf = true));
ncrb.add(VPC_LB_MODE_CNTL(CHIP));
ncrb.add(PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE(CHIP));
}
ncrb.add(A6XX_SP_PERFCTR_SHADER_MASK(.dword = 0x3f)); ncrb.add(A6XX_SP_PERFCTR_SHADER_MASK(.dword = 0x3f));
if (CHIP == A6XX && !screen->info->props.is_a702) if (CHIP == A6XX && !screen->info->props.is_a702)
ncrb.add(TPL1_UNKNOWN_B605(CHIP, .dword = 0x44)); ncrb.add(TPL1_UNKNOWN_B605(CHIP, .dword = 0x44));
if (CHIP == A6XX) { if (CHIP == A6XX) {
ncrb.add(HLSQ_UNKNOWN_BE00(CHIP, .dword = 0x80)); ncrb.add(HLSQ_UNKNOWN_BE00(CHIP, .dword = 0x80));
ncrb.add(HLSQ_UNKNOWN_BE01(CHIP)); ncrb.add(HLSQ_UNKNOWN_BE01(CHIP));
@ -875,10 +903,9 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
} }
ncrb.add(GRAS_SC_SCREEN_SCISSOR_CNTL(CHIP)); ncrb.add(GRAS_SC_SCREEN_SCISSOR_CNTL(CHIP));
ncrb.add(VPC_LB_MODE_CNTL(CHIP));
/* These regs are blocked (CP_PROTECT) on a6xx: */ /* These regs are blocked (CP_PROTECT) on a6xx, written by KMD on a8xx: */
if (CHIP >= A7XX) { if (CHIP == A7XX) {
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 0, 0)); ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 0, 0));
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 1, 0x3fe05ff4)); ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 1, 0x3fe05ff4));
ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 2, 0x3fa0ebee)); ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 2, 0x3fa0ebee));
@ -891,8 +918,6 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs)
ncrb.add(RB_BIN_FOVEAT(CHIP)); ncrb.add(RB_BIN_FOVEAT(CHIP));
} }
ncrb.add(PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE(CHIP));
if (CHIP == A7XX) if (CHIP == A7XX)
ncrb.add(RB_UNKNOWN_8E09(CHIP, 0x7)); ncrb.add(RB_UNKNOWN_8E09(CHIP, 0x7));
} }
@ -913,7 +938,11 @@ fd6_emit_static_context_regs(struct fd_context *ctx, fd_cs &cs)
crb.add(SP_GFX_USIZE(CHIP)); crb.add(SP_GFX_USIZE(CHIP));
crb.add(A6XX_TPL1_PS_ROTATION_CNTL()); crb.add(A6XX_TPL1_PS_ROTATION_CNTL());
crb.add(A6XX_RB_RBP_CNTL(.dword = screen->info->magic.RB_RBP_CNTL)); /* gen8 moves magic reg programming to KMD and blocks access for UMD: */
if (CHIP < A8XX) {
crb.add(A6XX_RB_RBP_CNTL(.dword = screen->info->magic.RB_RBP_CNTL));
}
crb.add(A6XX_SP_UNKNOWN_A9A8()); crb.add(A6XX_SP_UNKNOWN_A9A8());
crb.add(A6XX_SP_MODE_CNTL( crb.add(A6XX_SP_MODE_CNTL(
@ -1010,6 +1039,12 @@ fd6_emit_static_context_regs(struct fd_context *ctx, fd_cs &cs)
* so we play safe and don't add it. * so we play safe and don't add it.
*/ */
crb.add(PC_TF_BUFFER_SIZE(CHIP, FD6_TESS<CHIP>::FACTOR_SIZE)); crb.add(PC_TF_BUFFER_SIZE(CHIP, FD6_TESS<CHIP>::FACTOR_SIZE));
if (screen->info->props.has_attachment_shading_rate) {
for (int i = 0; i < 2; i++) {
crb.add(GRAS_LRZ_QUALITY_LOOKUP_TABLE_REG(CHIP, i, fd_gras_shading_rate_lut(i)));
}
}
} }
/* There is an optimization to skip executing draw states for draws with no /* There is an optimization to skip executing draw states for draws with no
@ -1076,9 +1111,17 @@ fd6_emit_restore(fd_cs &cs, struct fd_batch *batch)
.concurrent_bin_disable = true, .concurrent_bin_disable = true,
)); ));
if (CHIP == A8XX) {
fd6_lrz_inv<CHIP>(ctx, cs);
}
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_COLOR); fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_COLOR);
fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_DEPTH); fd6_event_write<CHIP>(ctx, cs, FD_CCU_INVALIDATE_DEPTH);
fd6_event_write<CHIP>(ctx, cs, FD_LRZ_INVALIDATE); if (CHIP == A8XX) {
fd_pkt7(cs, CP_CCHE_INVALIDATE, 0);
} else {
fd6_event_write<CHIP>(ctx, cs, FD_LRZ_INVALIDATE);
}
fd6_event_write<CHIP>(ctx, cs, FD_CACHE_INVALIDATE); fd6_event_write<CHIP>(ctx, cs, FD_CACHE_INVALIDATE);
fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0); fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0);

View file

@ -214,7 +214,7 @@ __event_write(fd_cs &cs, enum fd_gpu_event event,
struct fd_gpu_event_info info = fd_gpu_events<CHIP>[event]; struct fd_gpu_event_info info = fd_gpu_events<CHIP>[event];
unsigned len = info.needs_seqno ? 4 : 1; unsigned len = info.needs_seqno ? 4 : 1;
if ((CHIP == A7XX) && (event == FD_RB_DONE)) if ((CHIP >= A7XX) && (event == FD_RB_DONE))
len--; len--;
fd_pkt7 pkt(cs, CP_EVENT_WRITE, len); fd_pkt7 pkt(cs, CP_EVENT_WRITE, len);
@ -222,7 +222,7 @@ __event_write(fd_cs &cs, enum fd_gpu_event event,
if (CHIP == A6XX) { if (CHIP == A6XX) {
pkt.add(CP_EVENT_WRITE_0_EVENT(info.raw_event) | pkt.add(CP_EVENT_WRITE_0_EVENT(info.raw_event) |
COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP)); COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP));
} else if (CHIP == A7XX) { } else if (CHIP >= A7XX) {
pkt.add(CP_EVENT_WRITE7_0_EVENT(info.raw_event) | pkt.add(CP_EVENT_WRITE7_0_EVENT(info.raw_event) |
CP_EVENT_WRITE7_0_WRITE_SRC(esrc) | CP_EVENT_WRITE7_0_WRITE_SRC(esrc) |
CP_EVENT_WRITE7_0_WRITE_DST(edst) | CP_EVENT_WRITE7_0_WRITE_DST(edst) |
@ -281,6 +281,28 @@ fd6_cache_inv(struct fd_context *ctx, fd_cs &cs)
fd6_event_write<CHIP>(ctx, cs, FD_CACHE_INVALIDATE); fd6_event_write<CHIP>(ctx, cs, FD_CACHE_INVALIDATE);
} }
template <chip CHIP>
static inline void
fd6_lrz_inv(struct fd_context *ctx, fd_cs &cs)
{
with_crb (cs, 3) {
crb.add(GRAS_LRZ_CNTL(CHIP, .enable = true));
crb.add(GRAS_LRZ_CNTL2(CHIP,
.disable_on_wrong_dir = true,
.fc_enable = true,
));
crb.add(RB_LRZ_CNTL2(CHIP));
}
fd6_event_write<CHIP>(ctx, cs, FD_LRZ_FLUSH);
with_crb (cs, 3) {
crb.add(GRAS_LRZ_CNTL(CHIP));
crb.add(GRAS_LRZ_CNTL2(CHIP));
crb.add(RB_LRZ_CNTL2(CHIP));
}
}
template <chip CHIP> template <chip CHIP>
static inline void static inline void
fd6_set_rb_dbg_eco_mode(struct fd_context *ctx, fd_cs &cs, bool blit) fd6_set_rb_dbg_eco_mode(struct fd_context *ctx, fd_cs &cs, bool blit)
@ -311,8 +333,13 @@ template <chip CHIP>
static inline void static inline void
fd6_set_render_mode(fd_cs &cs, struct fd6_set_render_mode args) fd6_set_render_mode(fd_cs &cs, struct fd6_set_render_mode args)
{ {
fd_pkt7(cs, CP_SET_MARKER, 1) if (CHIP >= A8XX) {
.add(A6XX_CP_SET_MARKER_0(.mode = args.mode, .uses_gmem = args.uses_gmem)); fd_pkt7(cs, CP_SET_MARKER, 1)
.add(A8XX_CP_SET_MARKER_0(.mode = args.mode, .uses_gmem = args.uses_gmem));
} else {
fd_pkt7(cs, CP_SET_MARKER, 1)
.add(A6XX_CP_SET_MARKER_0(.mode = args.mode, .uses_gmem = args.uses_gmem));
}
} }
static inline bool static inline bool

View file

@ -364,6 +364,7 @@ use_hw_binning(struct fd_batch *batch)
(batch->num_draws > 0); (batch->num_draws > 0);
} }
template <chip CHIP>
static void static void
patch_fb_read_gmem(struct fd_batch *batch) patch_fb_read_gmem(struct fd_batch *batch)
{ {
@ -387,34 +388,70 @@ patch_fb_read_gmem(struct fd_batch *batch)
uint8_t swiz[4]; uint8_t swiz[4];
fdl6_format_swiz(psurf->format, false, swiz); fdl6_format_swiz(psurf->format, false, swiz);
uint64_t base = screen->gmem_base + gmem->cbuf_base[buf];
/* always TILE6_2 mode in GMEM, which also means no swap: */ /* always TILE6_2 mode in GMEM, which also means no swap: */
uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = { if (CHIP <= A7XX) {
A6XX_TEX_CONST_0_FMT(fd6_texture_format( uint64_t base = screen->gmem_base + gmem->cbuf_base[buf];
format, (enum a6xx_tile_mode)rsc->layout.tile_mode, false)) | uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = {
A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) | A6XX_TEX_CONST_0_FMT(fd6_texture_format(
A6XX_TEX_CONST_0_SWAP(WZYX) | format, (enum a6xx_tile_mode)rsc->layout.tile_mode, false)) |
A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) | A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) | A6XX_TEX_CONST_0_SWAP(WZYX) |
A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) | A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) |
A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) | COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) | A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) |
A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])), A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) |
A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) |
A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])),
A6XX_TEX_CONST_1_WIDTH(pfb->width) | A6XX_TEX_CONST_1_WIDTH(pfb->width) |
A6XX_TEX_CONST_1_HEIGHT(pfb->height), A6XX_TEX_CONST_1_HEIGHT(pfb->height),
A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[buf]) | A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[buf]) |
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D), A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size), A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size),
A6XX_TEX_CONST_4_BASE_LO(base), A6XX_TEX_CONST_4_BASE_LO(base),
A6XX_TEX_CONST_5_BASE_HI(base >> 32) | A6XX_TEX_CONST_5_BASE_HI(base >> 32) |
A6XX_TEX_CONST_5_DEPTH(prsc->array_size) A6XX_TEX_CONST_5_DEPTH(prsc->array_size)
}; };
memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4); memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4);
} else if (CHIP >= A8XX) {
/* base address is simply GMEM location when using GMEM tiling: */
uint64_t base = gmem->cbuf_base[buf];
uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = {
A8XX_TEX_MEMOBJ_0_BASE_LO(base),
A8XX_TEX_MEMOBJ_1_BASE_HI(base >> 32) |
A8XX_TEX_MEMOBJ_1_TYPE(A6XX_TEX_2D) |
A8XX_TEX_MEMOBJ_1_DEPTH(prsc->array_size),
A8XX_TEX_MEMOBJ_2_WIDTH(pfb->width) |
A8XX_TEX_MEMOBJ_2_HEIGHT(pfb->height) |
A8XX_TEX_MEMOBJ_2_SAMPLES(fd_msaa_samples(prsc->nr_samples)),
A8XX_TEX_MEMOBJ_3_FMT(fd6_texture_format(
format, (enum a6xx_tile_mode)rsc->layout.tile_mode, false)) |
A8XX_TEX_MEMOBJ_3_SWAP(WZYX) |
A8XX_TEX_MEMOBJ_3_SWIZ_X(fdl8_swiz(swiz[0])) |
A8XX_TEX_MEMOBJ_3_SWIZ_Y(fdl8_swiz(swiz[1])) |
A8XX_TEX_MEMOBJ_3_SWIZ_Z(fdl8_swiz(swiz[2])) |
A8XX_TEX_MEMOBJ_3_SWIZ_W(fdl8_swiz(swiz[3])),
A8XX_TEX_MEMOBJ_4_TILE_MODE(TILE6_2) |
COND(util_format_is_srgb(format), A8XX_TEX_MEMOBJ_4_SRGB),
0,
A8XX_TEX_MEMOBJ_6_TEX_LINE_OFFSET(gmem->bin_w * gmem->cbuf_cpp[buf] * 8) | /* in bits */
A8XX_TEX_MEMOBJ_6_MIN_LINE_OFFSET(0),
A8XX_TEX_MEMOBJ_7_ARRAY_SLICE_OFFSET(rsc->layout.layer_size),
};
memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4);
}
} }
util_dynarray_clear(&batch->fb_read_patches); util_dynarray_clear(&batch->fb_read_patches);
@ -897,7 +934,7 @@ struct bin_size_params {
enum a6xx_lrz_feedback_mask lrz_feedback_zmode_mask; enum a6xx_lrz_feedback_mask lrz_feedback_zmode_mask;
}; };
/* nregs: 4 */ /* nregs: 17 */
template <chip CHIP> template <chip CHIP>
static void static void
set_bin_size(fd_crb &crb, const struct fd_gmem_stateobj *gmem, struct bin_size_params p) set_bin_size(fd_crb &crb, const struct fd_gmem_stateobj *gmem, struct bin_size_params p)
@ -905,22 +942,14 @@ set_bin_size(fd_crb &crb, const struct fd_gmem_stateobj *gmem, struct bin_size_p
unsigned w = gmem ? gmem->bin_w : 0; unsigned w = gmem ? gmem->bin_w : 0;
unsigned h = gmem ? gmem->bin_h : 0; unsigned h = gmem ? gmem->bin_h : 0;
if (CHIP == A6XX) { crb.add(GRAS_SC_BIN_CNTL(CHIP,
crb.add(GRAS_SC_BIN_CNTL(CHIP, .binw = w, .binh = h,
.binw = w, .binh = h, .render_mode = p.render_mode,
.render_mode = p.render_mode, .force_lrz_write_dis = p.force_lrz_write_dis,
.force_lrz_write_dis = p.force_lrz_write_dis, .buffers_location = p.buffers_location,
.buffers_location = p.buffers_location, .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
.lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask, .force_bi_dir_lrz_disable = true,
)); ));
} else {
crb.add(GRAS_SC_BIN_CNTL(CHIP,
.binw = w, .binh = h,
.render_mode = p.render_mode,
.force_lrz_write_dis = p.force_lrz_write_dis,
.lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
));
}
crb.add(RB_CNTL( crb.add(RB_CNTL(
CHIP, CHIP,
.binw = w, .binh = h, .binw = w, .binh = h,
@ -930,6 +959,28 @@ set_bin_size(fd_crb &crb, const struct fd_gmem_stateobj *gmem, struct bin_size_p
.lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask, .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
)); ));
crb.add(A6XX_VFD_RENDER_MODE(p.render_mode)); crb.add(A6XX_VFD_RENDER_MODE(p.render_mode));
if (CHIP >= A8XX) {
crb.add(TPL1_BIN_SIZE(CHIP, .binw = w, .binh = h));
crb.add(TPL1_A2D_BIN_SIZE(CHIP, .binw = w, .binh = h));
crb.add(SP_BIN_SIZE(CHIP, .binw = w, .binh = h));
for (int i = 0; i < 8; i++) {
crb.add(RB_MRT_GMEM_DIMENSION_REG(CHIP, i,
.width = COND(gmem && gmem->cbuf_cpp[i], w),
.height = COND(gmem && gmem->cbuf_cpp[i], h),
));
}
crb.add(RB_DEPTH_GMEM_DIMENSION(CHIP,
.width = COND(gmem && gmem->zsbuf_cpp[0], w),
.height = COND(gmem && gmem->zsbuf_cpp[0], h),
));
crb.add(RB_STENCIL_GMEM_DIMENSION(CHIP,
.width = COND(gmem && (gmem->zsbuf_cpp[0] || gmem->zsbuf_cpp[1]), w),
.height = COND(gmem && (gmem->zsbuf_cpp[0] || gmem->zsbuf_cpp[1]), h),
));
}
/* no flag for RB_RESOLVE_CNTL_3... */ /* no flag for RB_RESOLVE_CNTL_3... */
crb.add(RB_RESOLVE_CNTL_3(CHIP, .binw = w, .binh = h)); crb.add(RB_RESOLVE_CNTL_3(CHIP, .binw = w, .binh = h));
} }
@ -1044,6 +1095,7 @@ template <chip CHIP>
static void static void
fd7_emit_static_binning_regs(fd_cs &cs, bool gmem) fd7_emit_static_binning_regs(fd_cs &cs, bool gmem)
{ {
bool full_in_gmem = false; /* gen8 TODO */
bool sysmem = !gmem; bool sysmem = !gmem;
fd_ncrb<CHIP> ncrb(cs, 4); fd_ncrb<CHIP> ncrb(cs, 4);
@ -1060,6 +1112,16 @@ fd7_emit_static_binning_regs(fd_cs &cs, bool gmem)
.rt5_sysmem = sysmem, .rt5_sysmem = sysmem,
.rt6_sysmem = sysmem, .rt6_sysmem = sysmem,
.rt7_sysmem = sysmem, .rt7_sysmem = sysmem,
.z_full_in_gmem = full_in_gmem,
.s_full_in_gmem = full_in_gmem,
.rt0_full_in_gmem = full_in_gmem,
.rt1_full_in_gmem = full_in_gmem,
.rt2_full_in_gmem = full_in_gmem,
.rt3_full_in_gmem = full_in_gmem,
.rt4_full_in_gmem = full_in_gmem,
.rt5_full_in_gmem = full_in_gmem,
.rt6_full_in_gmem = full_in_gmem,
.rt7_full_in_gmem = full_in_gmem,
)); ));
ncrb.add(GRAS_LRZ_CB_CNTL(CHIP, 0x0)); ncrb.add(GRAS_LRZ_CB_CNTL(CHIP, 0x0));
ncrb.add(GRAS_MODE_CNTL(CHIP, 0x2)); ncrb.add(GRAS_MODE_CNTL(CHIP, 0x2));
@ -1163,7 +1225,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
emit_msaa<CHIP>(crb, pfb->samples); emit_msaa<CHIP>(crb, pfb->samples);
} }
patch_fb_read_gmem(batch); patch_fb_read_gmem<CHIP>(batch);
if (CHIP >= A7XX) { if (CHIP >= A7XX) {
fd7_emit_static_binning_regs<CHIP>(cs, true); fd7_emit_static_binning_regs<CHIP>(cs, true);
@ -1171,7 +1233,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
if (use_hw_binning(batch)) { if (use_hw_binning(batch)) {
/* enable stream-out during binning pass: */ /* enable stream-out during binning pass: */
with_crb (cs, 5) { with_crb (cs, 18) {
crb.add(VPC_SO_OVERRIDE(CHIP, false)); crb.add(VPC_SO_OVERRIDE(CHIP, false));
set_bin_size<CHIP>(crb, gmem, { set_bin_size<CHIP>(crb, gmem, {
@ -1184,7 +1246,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
update_render_cntl<CHIP>(cs, screen, pfb, true); update_render_cntl<CHIP>(cs, screen, pfb, true);
emit_binning_pass<CHIP>(cs, batch); emit_binning_pass<CHIP>(cs, batch);
with_crb (cs, 5) { with_crb (cs, 18) {
/* and disable stream-out for draw pass: */ /* and disable stream-out for draw pass: */
crb.add(VPC_SO_OVERRIDE(CHIP, true)); crb.add(VPC_SO_OVERRIDE(CHIP, true));
@ -1219,7 +1281,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
control_ptr(fd6_context(batch->ctx), vsc_state) control_ptr(fd6_context(batch->ctx), vsc_state)
)); ));
} else { } else {
with_crb (cs, 5) { with_crb (cs, 18) {
/* no binning pass, so enable stream-out for draw pass: */ /* no binning pass, so enable stream-out for draw pass: */
crb.add(VPC_SO_OVERRIDE(CHIP, false)); crb.add(VPC_SO_OVERRIDE(CHIP, false));
@ -1240,7 +1302,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
emit_common_init<CHIP>(cs, batch); emit_common_init<CHIP>(cs, batch);
} }
/* nregs: 4 */ /* nregs: 5 */
template <chip CHIP> template <chip CHIP>
static void static void
set_window_offset(fd_crb &crb, uint32_t x1, uint32_t y1) set_window_offset(fd_crb &crb, uint32_t x1, uint32_t y1)
@ -1249,6 +1311,8 @@ set_window_offset(fd_crb &crb, uint32_t x1, uint32_t y1)
crb.add(A6XX_RB_RESOLVE_WINDOW_OFFSET(.x = x1, .y = y1)); crb.add(A6XX_RB_RESOLVE_WINDOW_OFFSET(.x = x1, .y = y1));
crb.add(SP_WINDOW_OFFSET(CHIP, .x = x1, .y = y1)); crb.add(SP_WINDOW_OFFSET(CHIP, .x = x1, .y = y1));
crb.add(A6XX_TPL1_WINDOW_OFFSET(.x = x1, .y = y1)); crb.add(A6XX_TPL1_WINDOW_OFFSET(.x = x1, .y = y1));
if (CHIP >= A8XX)
crb.add(TPL1_A2D_WINDOW_OFFSET(CHIP, .x = x1, .y = y1));
} }
/* before mem2gmem */ /* before mem2gmem */
@ -1310,7 +1374,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
fd_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1) fd_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1)
.add(0x0); .add(0x0);
with_crb (cs, 5) { with_crb (cs, 18) {
crb.add(VPC_SO_OVERRIDE(CHIP, true)); crb.add(VPC_SO_OVERRIDE(CHIP, true));
/* /*
@ -1340,7 +1404,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
.add(VPC_SO_OVERRIDE(CHIP, false)); .add(VPC_SO_OVERRIDE(CHIP, false));
} }
with_crb (cs, 8) { with_crb (cs, 22) {
set_window_offset<CHIP>(crb, x1, y1); set_window_offset<CHIP>(crb, x1, y1);
set_bin_size<CHIP>(crb, gmem, { set_bin_size<CHIP>(crb, gmem, {
@ -1366,8 +1430,8 @@ set_blit_scissor(struct fd_batch *batch, fd_cs &cs)
blit_scissor.minx = 0; blit_scissor.minx = 0;
blit_scissor.miny = 0; blit_scissor.miny = 0;
blit_scissor.maxx = align(pfb->width, 16); blit_scissor.maxx = MAX2(1, align(pfb->width, 16));
blit_scissor.maxy = align(pfb->height, 4); blit_scissor.maxy = MAX2(1, align(pfb->height, 4));
fd_pkt4(cs, 2) fd_pkt4(cs, 2)
.add(A6XX_RB_RESOLVE_CNTL_1(.x = blit_scissor.minx, .y = blit_scissor.miny)) .add(A6XX_RB_RESOLVE_CNTL_1(.x = blit_scissor.minx, .y = blit_scissor.miny))
@ -1947,8 +2011,7 @@ emit_sysmem_clears(fd_cs &cs, struct fd_batch *batch, struct fd_batch_subpass *s
uint32_t buffers = subpass->fast_cleared; uint32_t buffers = subpass->fast_cleared;
if (!buffers) assert(buffers);
return;
struct pipe_box box2d; struct pipe_box box2d;
u_box_2d(0, 0, pfb->width, pfb->height, &box2d); u_box_2d(0, 0, pfb->width, pfb->height, &box2d);
@ -2037,6 +2100,7 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
set_scissor<CHIP>(cs, 0, 0, pfb->width - 1, pfb->height - 1); set_scissor<CHIP>(cs, 0, 0, pfb->width - 1, pfb->height - 1);
else else
set_scissor<CHIP>(cs, 0, 0, 0, 0); set_scissor<CHIP>(cs, 0, 0, 0, 0);
set_blit_scissor(batch, cs);
set_tessfactor_bo<CHIP>(cs, batch); set_tessfactor_bo<CHIP>(cs, batch);
@ -2046,12 +2110,15 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
.add(GRAS_MODE_CNTL(CHIP, 0x2)); .add(GRAS_MODE_CNTL(CHIP, 0x2));
} }
with_crb (cs, 11) { with_crb (cs, 25) {
set_window_offset<CHIP>(crb, 0, 0); set_window_offset<CHIP>(crb, 0, 0);
set_bin_size<CHIP>(crb, NULL, { set_bin_size<CHIP>(crb, NULL, {
.render_mode = RENDERING_PASS, .render_mode = RENDERING_PASS,
.buffers_location = BUFFERS_IN_SYSMEM, .buffers_location = BUFFERS_IN_SYSMEM,
.lrz_feedback_zmode_mask = batch->ctx->screen->info->props.has_lrz_feedback
? LRZ_FEEDBACK_EARLY_Z_LATE_Z
: LRZ_FEEDBACK_NONE,
}); });
if (CHIP >= A7XX) { if (CHIP >= A7XX) {
@ -2113,7 +2180,7 @@ fd6_emit_sysmem(struct fd_batch *batch)
foreach_subpass (subpass, batch) { foreach_subpass (subpass, batch) {
if (subpass->fast_cleared) { if (subpass->fast_cleared) {
unsigned flushes = 0; unsigned flushes = FD6_INVALIDATE_CCHE | FD6_WAIT_FOR_IDLE;
if (subpass->fast_cleared & FD_BUFFER_COLOR) if (subpass->fast_cleared & FD_BUFFER_COLOR)
flushes |= FD6_INVALIDATE_CCU_COLOR; flushes |= FD6_INVALIDATE_CCU_COLOR;
if (subpass->fast_cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) if (subpass->fast_cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
@ -2121,6 +2188,7 @@ fd6_emit_sysmem(struct fd_batch *batch)
fd6_emit_flushes<CHIP>(batch->ctx, cs, flushes); fd6_emit_flushes<CHIP>(batch->ctx, cs, flushes);
emit_sysmem_clears<CHIP>(cs, batch, subpass); emit_sysmem_clears<CHIP>(cs, batch, subpass);
fd6_emit_flushes<CHIP>(batch->ctx, cs, flushes);
fd6_set_render_mode<CHIP>(cs, {RM6_DIRECT_RENDER}); fd6_set_render_mode<CHIP>(cs, {RM6_DIRECT_RENDER});
} }
@ -2154,7 +2222,11 @@ fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt
fd6_emit_flushes<CHIP>(batch->ctx, cs, fd6_emit_flushes<CHIP>(batch->ctx, cs,
FD6_FLUSH_CCU_COLOR | FD6_FLUSH_CCU_COLOR |
FD6_FLUSH_CCU_DEPTH); FD6_INVALIDATE_CCU_COLOR |
FD6_FLUSH_CCU_DEPTH |
FD6_INVALIDATE_CCU_DEPTH |
FD6_INVALIDATE_CCHE |
FD6_WAIT_FOR_IDLE);
} }
template <chip CHIP> template <chip CHIP>

View file

@ -283,7 +283,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader,
} }
} }
if (bufso->enabled_mask) { if ((CHIP < A8XX) && bufso->enabled_mask) {
fd_pkt7(cs, CP_LOAD_STATE6_FRAG, 3) fd_pkt7(cs, CP_LOAD_STATE6_FRAG, 3)
.add(CP_LOAD_STATE6_0( .add(CP_LOAD_STATE6_0(
.dst_off = IR3_BINDLESS_SSBO_OFFSET, .dst_off = IR3_BINDLESS_SSBO_OFFSET,
@ -299,7 +299,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader,
)); ));
} }
if (imgso->enabled_mask) { if ((CHIP < A8XX) && imgso->enabled_mask) {
fd_pkt7(cs, CP_LOAD_STATE6_FRAG, 3) fd_pkt7(cs, CP_LOAD_STATE6_FRAG, 3)
.add(CP_LOAD_STATE6_0( .add(CP_LOAD_STATE6_0(
.dst_off = IR3_BINDLESS_IMAGE_OFFSET, .dst_off = IR3_BINDLESS_IMAGE_OFFSET,
@ -329,7 +329,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader,
} }
} }
if (bufso->enabled_mask) { if ((CHIP < A8XX) && bufso->enabled_mask) {
fd_pkt7(cs, CP_LOAD_STATE6, 3) fd_pkt7(cs, CP_LOAD_STATE6, 3)
.add(CP_LOAD_STATE6_0( .add(CP_LOAD_STATE6_0(
.dst_off = IR3_BINDLESS_SSBO_OFFSET, .dst_off = IR3_BINDLESS_SSBO_OFFSET,
@ -345,7 +345,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader,
)); ));
} }
if (imgso->enabled_mask) { if ((CHIP < A8XX) && imgso->enabled_mask) {
fd_pkt7(cs, CP_LOAD_STATE6, 3) fd_pkt7(cs, CP_LOAD_STATE6, 3)
.add(CP_LOAD_STATE6_0( .add(CP_LOAD_STATE6_0(
.dst_off = IR3_BINDLESS_IMAGE_OFFSET, .dst_off = IR3_BINDLESS_IMAGE_OFFSET,

View file

@ -47,7 +47,7 @@ template <chip CHIP>
static void static void
emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so) emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so)
{ {
fd_crb crb(cs, 11); fd_crb crb(cs, 12);
mesa_shader_stage type = so->type; mesa_shader_stage type = so->type;
if (type == MESA_SHADER_KERNEL) if (type == MESA_SHADER_KERNEL)
@ -171,6 +171,12 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari
.earlypreamble = so->early_preamble, .earlypreamble = so->early_preamble,
.mergedregs = so->mergedregs, .mergedregs = so->mergedregs,
)); ));
if (CHIP >= A8XX) {
crb.add(RB_PS_CNTL(CHIP,
.pixlodenable = so->need_pixlod,
.lodpixmask = so->need_full_quad,
));
}
crb.add(A6XX_SP_PS_INSTR_SIZE(so->instrlen)); crb.add(A6XX_SP_PS_INSTR_SIZE(so->instrlen));
crb.add(A6XX_SP_PS_PROGRAM_COUNTER_OFFSET()); crb.add(A6XX_SP_PS_PROGRAM_COUNTER_OFFSET());
crb.add(A6XX_SP_PS_BASE(so->bo)); crb.add(A6XX_SP_PS_BASE(so->bo));
@ -464,6 +470,7 @@ primitive_to_tess(enum mesa_prim primitive)
#define MAX_VERTEX_ATTRIBS 32 #define MAX_VERTEX_ATTRIBS 32
template <chip CHIP>
static void static void
emit_vfd_dest(fd_crb &crb, const struct ir3_shader_variant *vs) emit_vfd_dest(fd_crb &crb, const struct ir3_shader_variant *vs)
{ {
@ -478,6 +485,26 @@ emit_vfd_dest(fd_crb &crb, const struct ir3_shader_variant *vs)
.decode_cnt = attr_count .decode_cnt = attr_count
)); ));
if (CHIP >= A8XX) {
const uint32_t vertexid_regid =
ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
const uint32_t instanceid_regid =
ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
/* Note: we currently don't support multiview.
*/
const uint32_t viewid_regid = INVALID_REG;
unsigned sideband_count =
(vertexid_regid != INVALID_REG) +
(instanceid_regid != INVALID_REG) +
(viewid_regid != INVALID_REG);
crb.add(PC_VS_INPUT_CNTL(CHIP,
.instr_cnt = attr_count,
.sideband_cnt = sideband_count,
));
}
for (uint32_t i = 0; i < attr_count; i++) { for (uint32_t i = 0; i < attr_count; i++) {
assert(!vs->inputs[i].sysval); assert(!vs->inputs[i].sysval);
crb.add(A6XX_VFD_DEST_CNTL_INSTR(i, crb.add(A6XX_VFD_DEST_CNTL_INSTR(i,
@ -1046,7 +1073,7 @@ emit_fs_inputs(fd_crb &crb, const struct program_builder *b)
)); ));
} }
if (CHIP == A7XX) { if (CHIP >= A7XX) {
for (int i = 0; i < fs->num_sampler_prefetch; i++) { for (int i = 0; i < fs->num_sampler_prefetch; i++) {
const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i]; const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
crb.add(A6XX_SP_PS_INITIAL_TEX_INDEX_CMD(i, crb.add(A6XX_SP_PS_INITIAL_TEX_INDEX_CMD(i,
@ -1057,6 +1084,10 @@ emit_fs_inputs(fd_crb &crb, const struct program_builder *b)
} }
crb.add(SP_LB_PARAM_LIMIT(CHIP, b->ctx->screen->info->props.prim_alloc_threshold)); crb.add(SP_LB_PARAM_LIMIT(CHIP, b->ctx->screen->info->props.prim_alloc_threshold));
if (CHIP == A8XX)
crb.add(RB_LB_PARAM_LIMIT(CHIP, b->ctx->screen->info->props.prim_alloc_threshold));
crb.add(SP_REG_PROG_ID_0(CHIP, crb.add(SP_REG_PROG_ID_0(CHIP,
.faceregid = face_regid, .faceregid = face_regid,
.sampleid = samp_id_regid, .sampleid = samp_id_regid,
@ -1246,7 +1277,7 @@ setup_stateobj(fd_cs &cs, const struct program_builder *b)
crb.add(PC_STEREO_RENDERING_CNTL(CHIP)); crb.add(PC_STEREO_RENDERING_CNTL(CHIP));
emit_vfd_dest(crb, b->vs); emit_vfd_dest<CHIP>(crb, b->vs);
emit_vpc<CHIP>(crb, b); emit_vpc<CHIP>(crb, b);
emit_fs_inputs<CHIP>(crb, b); emit_fs_inputs<CHIP>(crb, b);

View file

@ -92,6 +92,10 @@ template <chip CHIP>
static inline bool static inline bool
fd6_load_shader_consts_via_preamble(const struct ir3_shader_variant *v) fd6_load_shader_consts_via_preamble(const struct ir3_shader_variant *v)
{ {
if (CHIP == A8XX) {
assert(v->compiler->load_shader_consts_via_preamble);
return true;
}
return (CHIP == A7XX) && v->compiler->load_shader_consts_via_preamble; return (CHIP == A7XX) && v->compiler->load_shader_consts_via_preamble;
} }
@ -99,6 +103,10 @@ template <chip CHIP>
static inline bool static inline bool
fd6_load_inline_uniforms_via_preamble_ldgk(const struct ir3_shader_variant *v) fd6_load_inline_uniforms_via_preamble_ldgk(const struct ir3_shader_variant *v)
{ {
if (CHIP == A8XX) {
assert(v->compiler->load_inline_uniforms_via_preamble_ldgk);
return true;
}
return (CHIP == A7XX) && v->compiler->load_inline_uniforms_via_preamble_ldgk; return (CHIP == A7XX) && v->compiler->load_inline_uniforms_via_preamble_ldgk;
} }

View file

@ -36,7 +36,10 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
psize_max = cso->point_size; psize_max = cso->point_size;
} }
unsigned nreg = (CHIP >= A7XX) ? 46 : 15; unsigned nreg = (CHIP >= A7XX) ? 76 : 15;
if (CHIP >= A8XX)
nreg++;
fd_crb crb(ctx->pipe, nreg); fd_crb crb(ctx->pipe, nreg);
crb.add(GRAS_CL_CNTL(CHIP, crb.add(GRAS_CL_CNTL(CHIP,
@ -94,7 +97,10 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
crb.add(VPC_RAST_CNTL(CHIP, mode)); crb.add(VPC_RAST_CNTL(CHIP, mode));
crb.add(PC_DGEN_RAST_CNTL(CHIP, mode)); crb.add(PC_DGEN_RAST_CNTL(CHIP, mode));
if (CHIP == A7XX || if (CHIP >= A8XX)
crb.add(GRAS_RAST_CNTL(CHIP, mode));
if (CHIP >= A7XX ||
(CHIP == A6XX && ctx->screen->info->props.is_a702)) { (CHIP == A6XX && ctx->screen->info->props.is_a702)) {
crb.add(VPC_PS_RAST_CNTL(CHIP, mode)); crb.add(VPC_PS_RAST_CNTL(CHIP, mode));
} }
@ -113,10 +119,17 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx,
for (unsigned i = 0; i < num_viewports; i++) { for (unsigned i = 0; i < num_viewports; i++) {
crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MIN(CHIP, i, 0.0f)); crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MIN(CHIP, i, 0.0f));
crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MAX(CHIP, i, 1.0f)); crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MAX(CHIP, i, 1.0f));
if (CHIP >= A8XX) {
crb.add(RB_VIEWPORT_ZCLAMP_MIN_REG(CHIP, i, 0.0f));
crb.add(RB_VIEWPORT_ZCLAMP_MAX_REG(CHIP, i, 1.0f));
}
} }
crb.add(RB_VIEWPORT_ZCLAMP_MIN(CHIP, 0.0f)); if (CHIP <= A7XX) {
crb.add(RB_VIEWPORT_ZCLAMP_MAX(CHIP, 1.0f)); crb.add(RB_VIEWPORT_ZCLAMP_MIN(CHIP, 0.0f));
crb.add(RB_VIEWPORT_ZCLAMP_MAX(CHIP, 1.0f));
}
} }
if (CHIP == A6XX && ctx->screen->info->props.has_legacy_pipeline_shading_rate) { if (CHIP == A6XX && ctx->screen->info->props.has_legacy_pipeline_shading_rate) {

View file

@ -162,10 +162,40 @@ static const enum pc_di_primtype primtypes[] = {
}; };
/* clang-format on */ /* clang-format on */
static unsigned
calc_gmem_cache_offsets(struct fd_screen *screen, struct fd6_gmem_config *config)
{
unsigned num_ccu = screen->info->num_ccu;
/* Layout from end of gmem: */
unsigned offset = screen->gmemsize_bytes;
// ????
offset -= 0x78000;
config->vpc_bv_pos_buf_offset = offset - (num_ccu * config->vpc_bv_pos_buf_size);
offset = config->vpc_bv_pos_buf_offset;
config->vpc_attr_buf_offset = offset - (num_ccu * config->vpc_attr_buf_size);
offset = config->vpc_attr_buf_offset;
config->vpc_pos_buf_offset = offset - (num_ccu * config->vpc_pos_buf_size);
offset = config->vpc_pos_buf_offset;
config->color_ccu_offset = offset - (num_ccu * config->color_cache_size);
offset = config->color_ccu_offset;
config->depth_ccu_offset = offset - (num_ccu * config->depth_cache_size);
offset = config->depth_ccu_offset;
return offset;
}
void void
fd6_screen_init(struct pipe_screen *pscreen) fd6_screen_init(struct pipe_screen *pscreen)
{ {
struct fd_screen *screen = fd_screen(pscreen); struct fd_screen *screen = fd_screen(pscreen);
const struct fd_dev_info *info = screen->info;
screen->max_rts = A6XX_MAX_RENDER_TARGETS; screen->max_rts = A6XX_MAX_RENDER_TARGETS;
@ -183,7 +213,27 @@ fd6_screen_init(struct pipe_screen *pscreen)
sysmem->depth_ccu_offset = 0; sysmem->depth_ccu_offset = 0;
sysmem->color_ccu_offset = sysmem->depth_ccu_offset + depth_cache_size; sysmem->color_ccu_offset = sysmem->depth_ccu_offset + depth_cache_size;
if (screen->info->props.has_gmem_vpc_attr_buf) { /* TODO we could unify gen7/gen8 setup.. gen7 is a subset.. */
if (info->chip == 8) {
gmem->depth_cache_fraction = info->props.gmem_ccu_depth_cache_fraction;
gmem->depth_cache_size = info->props.gmem_per_ccu_depth_cache_size;
gmem->color_cache_fraction = info->props.gmem_ccu_color_cache_fraction;
gmem->color_cache_size = info->props.gmem_per_ccu_color_cache_size;
gmem->vpc_attr_buf_size = info->props.gmem_vpc_attr_buf_size;
gmem->vpc_pos_buf_size = info->props.gmem_vpc_pos_buf_size;
gmem->vpc_bv_pos_buf_size = info->props.gmem_vpc_bv_pos_buf_size;
sysmem->depth_cache_fraction = info->props.sysmem_ccu_depth_cache_fraction;
sysmem->depth_cache_size = info->props.sysmem_per_ccu_depth_cache_size;
sysmem->color_cache_fraction = info->props.sysmem_ccu_color_cache_fraction;
sysmem->color_cache_size = info->props.sysmem_per_ccu_color_cache_size;
sysmem->vpc_attr_buf_size = info->props.sysmem_vpc_attr_buf_size;
sysmem->vpc_pos_buf_size = info->props.sysmem_vpc_pos_buf_size;
sysmem->vpc_bv_pos_buf_size = info->props.sysmem_vpc_bv_pos_buf_size;
calc_gmem_cache_offsets(screen, sysmem);
screen->gmemsize_bytes = calc_gmem_cache_offsets(screen, gmem);
} else if (screen->info->props.has_gmem_vpc_attr_buf) {
sysmem->vpc_attr_buf_size = screen->info->props.sysmem_vpc_attr_buf_size; sysmem->vpc_attr_buf_size = screen->info->props.sysmem_vpc_attr_buf_size;
sysmem->vpc_attr_buf_offset = sysmem->color_ccu_offset + color_cache_size; sysmem->vpc_attr_buf_offset = sysmem->color_ccu_offset + color_cache_size;

View file

@ -250,6 +250,7 @@ get_bcolor_offset(struct fd_context *ctx, const struct pipe_sampler_state *sampl
return idx; return idx;
} }
template <chip CHIP>
static void * static void *
fd6_sampler_state_create(struct pipe_context *pctx, fd6_sampler_state_create(struct pipe_context *pctx,
const struct pipe_sampler_state *cso) const struct pipe_sampler_state *cso)
@ -276,28 +277,67 @@ fd6_sampler_state_create(struct pipe_context *pctx,
cso->min_img_filter == PIPE_TEX_FILTER_LINEAR; cso->min_img_filter == PIPE_TEX_FILTER_LINEAR;
bool needs_border = false; bool needs_border = false;
so->descriptor[0] =
COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A6XX_TEX_SAMP_0_ANISO((enum a6xx_tex_aniso)aniso) |
A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &needs_border)) |
A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &needs_border)) |
A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &needs_border)) |
A6XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->descriptor[1] = if (CHIP <= A7XX) {
COND(cso->compare_mode, A6XX_TEX_SAMP_1_COMPARE_FUNC((enum adreno_compare_func)cso->compare_func)) | so->descriptor[0] =
COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE, COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) | A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
COND(cso->unnormalized_coords, A6XX_TEX_SAMP_1_UNNORM_COORDS) | A6XX_TEX_SAMP_0_ANISO((enum a6xx_tex_aniso)aniso) |
A6XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &needs_border)) |
A6XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &needs_border)) |
A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &needs_border)) |
A6XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
so->descriptor[2] = so->descriptor[1] =
A6XX_TEX_SAMP_2_REDUCTION_MODE(reduction_mode(cso->reduction_mode)) | COND(cso->compare_mode, A6XX_TEX_SAMP_1_COMPARE_FUNC((enum adreno_compare_func)cso->compare_func)) |
COND(chroma_linear, A6XX_TEX_SAMP_2_CHROMA_LINEAR); COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE,
A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) |
COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(cso->unnormalized_coords, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
A6XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
A6XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
so->descriptor[2] =
A6XX_TEX_SAMP_2_REDUCTION_MODE(reduction_mode(cso->reduction_mode)) |
COND(chroma_linear, A6XX_TEX_SAMP_2_CHROMA_LINEAR);
} else if (CHIP >= A8XX) {
const float MAX_LOD = 4095.0f / 256.0f;
so->descriptor[0] =
COND(miplinear, A8XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE,
A8XX_TEX_SAMP_0_MIPMAPING_DIS) |
A8XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
A8XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
A8XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &needs_border)) |
A8XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &needs_border)) |
A8XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &needs_border)) |
A8XX_TEX_SAMP_0_LOD_BIAS(CLAMP(cso->lod_bias, -16, MAX_LOD)) |
A8XX_TEX_SAMP_0_ANISO((enum a6xx_tex_aniso)aniso);
float min_lod, max_lod;
if (cso->unnormalized_coords) {
min_lod = max_lod = 0;
} else if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
min_lod = CLAMP(cso->min_lod, 0.0f, MAX_LOD);
max_lod = CLAMP(cso->max_lod, 0.0f, MAX_LOD);
} else {
min_lod = CLAMP(cso->min_lod, 0.0f, 0.25f);
max_lod = CLAMP(cso->max_lod, 0.0f, 0.25f);
}
so->descriptor[1] =
A8XX_TEX_SAMP_1_MAX_LOD(max_lod) |
A8XX_TEX_SAMP_1_MIN_LOD(min_lod) |
A8XX_TEX_SAMP_1_REDUCTION_MODE(reduction_mode(cso->reduction_mode)) |
COND(cso->compare_mode, A8XX_TEX_SAMP_1_COMPARE_FUNC((enum adreno_compare_func)cso->compare_func)) |
COND(chroma_linear, A8XX_TEX_SAMP_1_CHROMA_LINEAR) |
COND(!cso->seamless_cube_map, A8XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
COND(cso->unnormalized_coords, A8XX_TEX_SAMP_1_UNNORM_COORDS);
}
if (needs_border) { if (needs_border) {
bool fast_border_color_enable = false; bool fast_border_color_enable = false;
@ -328,11 +368,20 @@ fd6_sampler_state_create(struct pipe_context *pctx,
fast_border_color = A6XX_BORDER_COLOR_1_1_1_1; fast_border_color = A6XX_BORDER_COLOR_1_1_1_1;
} }
if (fast_border_color_enable) { if (CHIP <= A7XX) {
so->descriptor[2] |= A6XX_TEX_SAMP_2_FASTBORDERCOLOR(fast_border_color) | if (fast_border_color_enable) {
A6XX_TEX_SAMP_2_FASTBORDERCOLOREN; so->descriptor[2] |= A6XX_TEX_SAMP_2_FASTBORDERCOLOR(fast_border_color) |
} else { A6XX_TEX_SAMP_2_FASTBORDERCOLOREN;
so->descriptor[2] |= A6XX_TEX_SAMP_2_BCOLOR(get_bcolor_offset(ctx, cso)); } else {
so->descriptor[2] |= A6XX_TEX_SAMP_2_BCOLOR(get_bcolor_offset(ctx, cso));
}
} else if (CHIP >= A8XX) {
if (fast_border_color_enable) {
so->descriptor[2] |= A8XX_TEX_SAMP_2_FASTBORDERCOLOR(fast_border_color) |
A8XX_TEX_SAMP_2_FASTBORDERCOLOREN;
} else {
so->descriptor[2] |= A8XX_TEX_SAMP_2_BCOLOR(get_bcolor_offset(ctx, cso));
}
} }
} }
@ -877,7 +926,7 @@ fd6_texture_init(struct pipe_context *pctx) disable_thread_safety_analysis
struct fd_context *ctx = fd_context(pctx); struct fd_context *ctx = fd_context(pctx);
struct fd6_context *fd6_ctx = fd6_context(ctx); struct fd6_context *fd6_ctx = fd6_context(ctx);
pctx->create_sampler_state = fd6_sampler_state_create; pctx->create_sampler_state = fd6_sampler_state_create<CHIP>;
pctx->delete_sampler_state = fd6_sampler_state_delete; pctx->delete_sampler_state = fd6_sampler_state_delete;
pctx->bind_sampler_states = fd_sampler_states_bind; pctx->bind_sampler_states = fd_sampler_states_bind;

View file

@ -187,7 +187,7 @@ fd6_zsa_state_create(struct pipe_context *pctx,
bool depth_clamp_enable = (i & FD6_ZSA_DEPTH_CLAMP); bool depth_clamp_enable = (i & FD6_ZSA_DEPTH_CLAMP);
bool no_alpha = (i & FD6_ZSA_NO_ALPHA); bool no_alpha = (i & FD6_ZSA_NO_ALPHA);
fd_crb crb(ctx->pipe, 9); fd_crb crb(ctx->pipe, 10);
crb.add(A6XX_RB_ALPHA_TEST_CNTL( crb.add(A6XX_RB_ALPHA_TEST_CNTL(
.alpha_ref = (uint32_t)(cso->alpha_ref_value * 255.0f) & 0xff, .alpha_ref = (uint32_t)(cso->alpha_ref_value * 255.0f) & 0xff,
@ -195,6 +195,12 @@ fd6_zsa_state_create(struct pipe_context *pctx,
.alpha_test_func = (enum adreno_compare_func)cso->alpha_func, .alpha_test_func = (enum adreno_compare_func)cso->alpha_func,
)); ));
if (CHIP >= A8XX) {
crb.add(SP_ALPHA_TEST_CNTL(CHIP,
.alpha_test = cso->alpha_enabled && !no_alpha,
));
}
crb.add(A6XX_RB_STENCIL_CNTL( crb.add(A6XX_RB_STENCIL_CNTL(
.stencil_enable = fs->enabled, .stencil_enable = fs->enabled,
.stencil_enable_bf = bs->enabled, .stencil_enable_bf = bs->enabled,
@ -220,6 +226,7 @@ fd6_zsa_state_create(struct pipe_context *pctx,
.z_clamp_enable = depth_clamp_enable || CHIP >= A7XX, .z_clamp_enable = depth_clamp_enable || CHIP >= A7XX,
.z_read_enable = cso->depth_enabled || cso->depth_bounds_test, .z_read_enable = cso->depth_enabled || cso->depth_bounds_test,
.z_bounds_enable = cso->depth_bounds_test, .z_bounds_enable = cso->depth_bounds_test,
.o_depth_01_clamp_en = CHIP >= A8XX,
)); ));
crb.add(GRAS_SU_DEPTH_CNTL(CHIP, cso->depth_enabled)); crb.add(GRAS_SU_DEPTH_CNTL(CHIP, cso->depth_enabled));

View file

@ -995,6 +995,11 @@ fd_screen_create(int fd,
screen->dev_info = info; screen->dev_info = info;
screen->info = &screen->dev_info; screen->info = &screen->dev_info;
if (screen->gen == 8) {
/* gen8 TODO */
fd_mesa_debug |= FD_DBG_NOLRZ;
}
switch (screen->gen) { switch (screen->gen) {
case 2: case 2:
fd2_screen_init(pscreen); fd2_screen_init(pscreen);
@ -1010,6 +1015,7 @@ fd_screen_create(int fd,
break; break;
case 6: case 6:
case 7: case 7:
case 8:
fd6_screen_init(pscreen); fd6_screen_init(pscreen);
break; break;
default: default:

View file

@ -60,6 +60,18 @@ struct fd6_gmem_config {
/* Vertex attrib cache (a750+): */ /* Vertex attrib cache (a750+): */
uint32_t vpc_attr_buf_size; uint32_t vpc_attr_buf_size;
uint32_t vpc_attr_buf_offset; uint32_t vpc_attr_buf_offset;
/* Vertex position cache (a8xx+): */
uint32_t vpc_pos_buf_size;
uint32_t vpc_pos_buf_offset;
uint32_t vpc_bv_pos_buf_size;
uint32_t vpc_bv_pos_buf_offset;
/* see enum a6xx_ccu_cache_size */
uint32_t depth_cache_fraction;
uint32_t color_cache_fraction;
uint32_t depth_cache_size;
uint32_t color_cache_size;
}; };
struct fd_screen { struct fd_screen {