From 11364f4ee2ed36b19505451b5944bbb790ef4c35 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 27 Oct 2025 12:37:54 -0700 Subject: [PATCH] freedreno: gen8 support Enable gen8 support. Sysmem, gmem, and binning work. DEQP gles2/3/31 tests are passing. LRZ is not supported yet, and will follow later. Signed-off-by: Rob Clark Part-of: --- .../drivers/freedreno/a6xx/fd6_barrier.cc | 3 + .../drivers/freedreno/a6xx/fd6_barrier.h | 8 +- .../drivers/freedreno/a6xx/fd6_blend.cc | 11 +- .../drivers/freedreno/a6xx/fd6_compute.cc | 2 +- .../drivers/freedreno/a6xx/fd6_context.cc | 6 +- .../drivers/freedreno/a6xx/fd6_draw.cc | 4 + .../drivers/freedreno/a6xx/fd6_emit.cc | 71 +++++-- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 35 +++- .../drivers/freedreno/a6xx/fd6_gmem.cc | 178 ++++++++++++------ .../drivers/freedreno/a6xx/fd6_image.cc | 8 +- .../drivers/freedreno/a6xx/fd6_program.cc | 37 +++- .../drivers/freedreno/a6xx/fd6_program.h | 8 + .../drivers/freedreno/a6xx/fd6_rasterizer.cc | 21 ++- .../drivers/freedreno/a6xx/fd6_screen.cc | 52 ++++- .../drivers/freedreno/a6xx/fd6_texture.cc | 101 +++++++--- src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc | 9 +- .../drivers/freedreno/freedreno_screen.c | 6 + .../drivers/freedreno/freedreno_screen.h | 12 ++ 18 files changed, 455 insertions(+), 117 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc index 64a77e19c00..dc478ae0eeb 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc @@ -31,6 +31,9 @@ fd6_emit_flushes(struct fd_context *ctx, fd_cs &cs, unsigned flushes) if (flushes & FD6_INVALIDATE_CCU_DEPTH) fd6_event_write(ctx, cs, FD_CCU_INVALIDATE_DEPTH); + if ((CHIP >= A7XX) && (flushes & FD6_INVALIDATE_CCHE)) + fd_pkt7(cs, CP_CCHE_INVALIDATE, 0); + if (flushes & FD6_FLUSH_CACHE) fd6_event_write(ctx, cs, FD_CACHE_CLEAN); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.h b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.h index 017bb4a1f22..b53c26e0daf 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_barrier.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_barrier.h @@ -20,9 +20,11 @@ enum fd6_flush { FD6_INVALIDATE_CCU_DEPTH = BIT(3), FD6_FLUSH_CACHE = BIT(4), FD6_INVALIDATE_CACHE = BIT(5), - FD6_WAIT_MEM_WRITES = BIT(6), - FD6_WAIT_FOR_IDLE = BIT(7), - FD6_WAIT_FOR_ME = BIT(8), + FD6_INVALIDATE_CCHE = BIT(6), + + FD6_WAIT_MEM_WRITES = BIT(16), + FD6_WAIT_FOR_IDLE = BIT(17), + FD6_WAIT_FOR_ME = BIT(18), }; template diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.cc b/src/gallium/drivers/freedreno/a6xx/fd6_blend.cc index 31d54d2055c..769f802baeb 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.cc @@ -60,7 +60,7 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend, if (!so) return NULL; - unsigned nregs = (2 * A6XX_MAX_RENDER_TARGETS) + 3; + unsigned nregs = (3 * A6XX_MAX_RENDER_TARGETS) + 3; fd_crb crb(blend->ctx->pipe, nregs); for (unsigned i = 0; i <= cso->max_rt; i++) { @@ -87,6 +87,14 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend, .component_enable = rt->colormask, )); + if (CHIP == A8XX) { + crb.add(SP_MRT_BLEND_CNTL_REG(CHIP, i, + .color_blend_en = rt->blend_enable, + .alpha_blend_en = rt->blend_enable, + .component_write_mask = rt->colormask, + )); + } + if (rt->blend_enable) { mrt_blend |= (1 << i); } @@ -114,6 +122,7 @@ __fd6_setup_blend_variant(struct fd6_blend_stateobj *blend, .independent_blend_en = cso->independent_blend_enable, .dual_color_in_enable = blend->use_dual_src_blend, .alpha_to_coverage = cso->alpha_to_coverage, + .alpha_to_one = cso->alpha_to_one, )) .add(A6XX_RB_BLEND_CNTL( .blend_reads_dest = mrt_blend, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc index ec6cd7ff7d0..314db5ef580 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.cc @@ -38,7 +38,7 @@ cs_program_emit_local_size(struct fd_context *ctx, fd_crb &crb, enum a6xx_threadsize thrsz_cs = ctx->screen->info->props .supports_double_threadsize ? thrsz : THREAD128; - if (CHIP == A7XX) { + if (CHIP >= A7XX) { unsigned tile_height = (local_size[1] % 8 == 0) ? 3 : (local_size[1] % 4 == 0) ? 5 : (local_size[1] % 2 == 0) ? 9 diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc index ddc4dfc8a9b..0211f5385e2 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc @@ -313,8 +313,10 @@ fd6_context_create(struct pipe_screen *pscreen, void *priv, fd_crb crb(fd6_ctx->base.pipe, 3); crb.add(GRAS_SC_MSAA_SAMPLE_POS_CNTL(CHIP)) - .add(A6XX_RB_MSAA_SAMPLE_POS_CNTL()) - .add(TPL1_MSAA_SAMPLE_POS_CNTL(CHIP)); + .add(A6XX_RB_MSAA_SAMPLE_POS_CNTL()); + + if (CHIP < A8XX) + crb.add(TPL1_MSAA_SAMPLE_POS_CNTL(CHIP)); fd6_ctx->sample_locations_disable_stateobj = crb; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc index b527427f90d..00232bad861 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.cc @@ -435,6 +435,10 @@ draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info, /* convert from # of patches to draw count */ subdraw_size *= ctx->patch_vertices; + /* For gen8 tess_bo is sized for two draws, adjust subdraw size accordingly: */ + if (CHIP >= A8XX) + subdraw_size /= 2; + fd_pkt7(cs, CP_SET_SUBDRAW_SIZE, 1) .add(subdraw_size); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index 1a254b5aca2..48d02729550 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -21,6 +21,7 @@ #include "freedreno_state.h" #include "freedreno_stompable_regs.h" #include "freedreno_tracepoints.h" +#include "freedreno_vrs.h" #include "fd6_blend.h" #include "fd6_const.h" @@ -464,7 +465,7 @@ fd6_emit_non_group(fd_cs &cs, struct fd6_emit *emit) assert_dt const enum fd_dirty_3d_state dirty = ctx->dirty; unsigned num_viewports = emit->prog->num_viewports; - fd_crb crb(cs, 324); + fd_crb crb(cs, 2 + (12 * num_viewports)); if (dirty & FD_DIRTY_STENCIL_REF) { struct pipe_stencil_ref *sr = &ctx->stencil_ref; @@ -506,8 +507,11 @@ fd6_emit_non_group(fd_cs &cs, struct fd6_emit *emit) assert_dt crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MIN(CHIP, i, zmin)); crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MAX(CHIP, i, zmax)); - /* TODO: what to do about this and multi viewport ? */ - if (i == 0) { + if (CHIP >= A8XX) { + crb.add(RB_VIEWPORT_ZCLAMP_MIN_REG(CHIP, i, zmin)); + crb.add(RB_VIEWPORT_ZCLAMP_MAX_REG(CHIP, i, zmax)); + } else if (i == 0) { + /* TODO: what to do about this and multi viewport ? */ crb.add(RB_VIEWPORT_ZCLAMP_MIN(CHIP, zmin)); crb.add(RB_VIEWPORT_ZCLAMP_MAX(CHIP, zmax)); } @@ -764,7 +768,24 @@ fd6_emit_gmem_cache_cntl(fd_cs &cs, struct fd_screen *screen, bool gmem) uint32_t depth_offset = cfg->depth_ccu_offset & 0x1fffff; uint32_t depth_offset_hi = cfg->depth_ccu_offset >> 21; - if (CHIP == A7XX) { + if (CHIP == A8XX) { + fd_crb(cs, 10) + .add(RB_CCU_CACHE_CNTL(CHIP, + .depth_cache_size = (enum a6xx_ccu_cache_size)cfg->depth_cache_fraction, + .depth_offset = cfg->depth_ccu_offset, + .color_cache_size = (enum a6xx_ccu_cache_size)cfg->color_cache_fraction, + .color_offset = cfg->color_ccu_offset, + )) + .add(VPC_ATTR_BUF_GMEM_SIZE(CHIP, cfg->vpc_attr_buf_size)) + .add(VPC_ATTR_BUF_GMEM_BASE(CHIP, cfg->vpc_attr_buf_offset)) + .add(PC_ATTR_BUF_GMEM_SIZE(CHIP, cfg->vpc_attr_buf_size)) + .add(VPC_POS_BUF_GMEM_SIZE(CHIP, cfg->vpc_pos_buf_size)) + .add(VPC_POS_BUF_GMEM_BASE(CHIP, cfg->vpc_pos_buf_offset)) + .add(PC_POS_BUF_GMEM_SIZE(CHIP, cfg->vpc_pos_buf_size)) + .add(VPC_BV_POS_BUF_GMEM_BASE(CHIP, cfg->vpc_bv_pos_buf_offset)) + .add(VPC_BV_POS_BUF_GMEM_SIZE(CHIP, cfg->vpc_bv_pos_buf_size)) + .add(PC_BV_POS_BUF_GMEM_SIZE(CHIP, cfg->vpc_bv_pos_buf_size)); + } else if (CHIP == A7XX) { fd_pkt4(cs, 1) .add(RB_CCU_CACHE_CNTL(CHIP, .depth_offset_hi = depth_offset_hi, @@ -821,7 +842,7 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs) fd_ncrb ncrb(cs, 28 + ARRAY_SIZE(screen->info->magic_raw)); - if (CHIP >= A7XX) { + if (CHIP == A7XX) { /* On A7XX, RB_CCU_CNTL was broken into two registers, RB_CCU_CNTL which has * static properties that can be set once, this requires a WFI to take effect. * While the newly introduced register RB_CCU_CACHE_CNTL has properties that may @@ -858,12 +879,19 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs) ncrb.add({ .reg = magic_reg.reg, .value = value }); } - ncrb.add(A6XX_RB_DBG_ECO_CNTL(.dword = screen->info->magic.RB_DBG_ECO_CNTL)); - ncrb.add(A6XX_SP_NC_MODE_CNTL_2(.f16_no_inf = true)); + /* gen8 moves magic reg setup to KMD and blocks access from UMD: + */ + if (CHIP < A8XX) { + ncrb.add(A6XX_RB_DBG_ECO_CNTL(.dword = screen->info->magic.RB_DBG_ECO_CNTL)); + ncrb.add(A6XX_SP_NC_MODE_CNTL_2(.f16_no_inf = true)); + ncrb.add(VPC_LB_MODE_CNTL(CHIP)); + ncrb.add(PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE(CHIP)); + } ncrb.add(A6XX_SP_PERFCTR_SHADER_MASK(.dword = 0x3f)); if (CHIP == A6XX && !screen->info->props.is_a702) ncrb.add(TPL1_UNKNOWN_B605(CHIP, .dword = 0x44)); + if (CHIP == A6XX) { ncrb.add(HLSQ_UNKNOWN_BE00(CHIP, .dword = 0x80)); ncrb.add(HLSQ_UNKNOWN_BE01(CHIP)); @@ -875,10 +903,9 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs) } ncrb.add(GRAS_SC_SCREEN_SCISSOR_CNTL(CHIP)); - ncrb.add(VPC_LB_MODE_CNTL(CHIP)); - /* These regs are blocked (CP_PROTECT) on a6xx: */ - if (CHIP >= A7XX) { + /* These regs are blocked (CP_PROTECT) on a6xx, written by KMD on a8xx: */ + if (CHIP == A7XX) { ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 0, 0)); ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 1, 0x3fe05ff4)); ncrb.add(TPL1_BICUBIC_WEIGHTS_TABLE_REG(CHIP, 2, 0x3fa0ebee)); @@ -891,8 +918,6 @@ fd6_emit_static_non_context_regs(struct fd_context *ctx, fd_cs &cs) ncrb.add(RB_BIN_FOVEAT(CHIP)); } - ncrb.add(PC_CONTEXT_SWITCH_GFX_PREEMPTION_MODE(CHIP)); - if (CHIP == A7XX) ncrb.add(RB_UNKNOWN_8E09(CHIP, 0x7)); } @@ -913,7 +938,11 @@ fd6_emit_static_context_regs(struct fd_context *ctx, fd_cs &cs) crb.add(SP_GFX_USIZE(CHIP)); crb.add(A6XX_TPL1_PS_ROTATION_CNTL()); - crb.add(A6XX_RB_RBP_CNTL(.dword = screen->info->magic.RB_RBP_CNTL)); + /* gen8 moves magic reg programming to KMD and blocks access for UMD: */ + if (CHIP < A8XX) { + crb.add(A6XX_RB_RBP_CNTL(.dword = screen->info->magic.RB_RBP_CNTL)); + } + crb.add(A6XX_SP_UNKNOWN_A9A8()); crb.add(A6XX_SP_MODE_CNTL( @@ -1010,6 +1039,12 @@ fd6_emit_static_context_regs(struct fd_context *ctx, fd_cs &cs) * so we play safe and don't add it. */ crb.add(PC_TF_BUFFER_SIZE(CHIP, FD6_TESS::FACTOR_SIZE)); + + if (screen->info->props.has_attachment_shading_rate) { + for (int i = 0; i < 2; i++) { + crb.add(GRAS_LRZ_QUALITY_LOOKUP_TABLE_REG(CHIP, i, fd_gras_shading_rate_lut(i))); + } + } } /* There is an optimization to skip executing draw states for draws with no @@ -1076,9 +1111,17 @@ fd6_emit_restore(fd_cs &cs, struct fd_batch *batch) .concurrent_bin_disable = true, )); + if (CHIP == A8XX) { + fd6_lrz_inv(ctx, cs); + } + fd6_event_write(ctx, cs, FD_CCU_INVALIDATE_COLOR); fd6_event_write(ctx, cs, FD_CCU_INVALIDATE_DEPTH); - fd6_event_write(ctx, cs, FD_LRZ_INVALIDATE); + if (CHIP == A8XX) { + fd_pkt7(cs, CP_CCHE_INVALIDATE, 0); + } else { + fd6_event_write(ctx, cs, FD_LRZ_INVALIDATE); + } fd6_event_write(ctx, cs, FD_CACHE_INVALIDATE); fd_pkt7(cs, CP_WAIT_FOR_IDLE, 0); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 5c055766041..14e61527903 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -214,7 +214,7 @@ __event_write(fd_cs &cs, enum fd_gpu_event event, struct fd_gpu_event_info info = fd_gpu_events[event]; unsigned len = info.needs_seqno ? 4 : 1; - if ((CHIP == A7XX) && (event == FD_RB_DONE)) + if ((CHIP >= A7XX) && (event == FD_RB_DONE)) len--; fd_pkt7 pkt(cs, CP_EVENT_WRITE, len); @@ -222,7 +222,7 @@ __event_write(fd_cs &cs, enum fd_gpu_event event, if (CHIP == A6XX) { pkt.add(CP_EVENT_WRITE_0_EVENT(info.raw_event) | COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP)); - } else if (CHIP == A7XX) { + } else if (CHIP >= A7XX) { pkt.add(CP_EVENT_WRITE7_0_EVENT(info.raw_event) | CP_EVENT_WRITE7_0_WRITE_SRC(esrc) | CP_EVENT_WRITE7_0_WRITE_DST(edst) | @@ -281,6 +281,28 @@ fd6_cache_inv(struct fd_context *ctx, fd_cs &cs) fd6_event_write(ctx, cs, FD_CACHE_INVALIDATE); } +template +static inline void +fd6_lrz_inv(struct fd_context *ctx, fd_cs &cs) +{ + with_crb (cs, 3) { + crb.add(GRAS_LRZ_CNTL(CHIP, .enable = true)); + crb.add(GRAS_LRZ_CNTL2(CHIP, + .disable_on_wrong_dir = true, + .fc_enable = true, + )); + crb.add(RB_LRZ_CNTL2(CHIP)); + } + + fd6_event_write(ctx, cs, FD_LRZ_FLUSH); + + with_crb (cs, 3) { + crb.add(GRAS_LRZ_CNTL(CHIP)); + crb.add(GRAS_LRZ_CNTL2(CHIP)); + crb.add(RB_LRZ_CNTL2(CHIP)); + } +} + template static inline void fd6_set_rb_dbg_eco_mode(struct fd_context *ctx, fd_cs &cs, bool blit) @@ -311,8 +333,13 @@ template static inline void fd6_set_render_mode(fd_cs &cs, struct fd6_set_render_mode args) { - fd_pkt7(cs, CP_SET_MARKER, 1) - .add(A6XX_CP_SET_MARKER_0(.mode = args.mode, .uses_gmem = args.uses_gmem)); + if (CHIP >= A8XX) { + fd_pkt7(cs, CP_SET_MARKER, 1) + .add(A8XX_CP_SET_MARKER_0(.mode = args.mode, .uses_gmem = args.uses_gmem)); + } else { + fd_pkt7(cs, CP_SET_MARKER, 1) + .add(A6XX_CP_SET_MARKER_0(.mode = args.mode, .uses_gmem = args.uses_gmem)); + } } static inline bool diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc index 1de83691735..59e54e77b18 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc @@ -364,6 +364,7 @@ use_hw_binning(struct fd_batch *batch) (batch->num_draws > 0); } +template static void patch_fb_read_gmem(struct fd_batch *batch) { @@ -387,34 +388,70 @@ patch_fb_read_gmem(struct fd_batch *batch) uint8_t swiz[4]; fdl6_format_swiz(psurf->format, false, swiz); - uint64_t base = screen->gmem_base + gmem->cbuf_base[buf]; /* always TILE6_2 mode in GMEM, which also means no swap: */ - uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = { - A6XX_TEX_CONST_0_FMT(fd6_texture_format( - format, (enum a6xx_tile_mode)rsc->layout.tile_mode, false)) | - A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) | - A6XX_TEX_CONST_0_SWAP(WZYX) | - A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) | - COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) | - A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) | - A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) | - A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) | - A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])), + if (CHIP <= A7XX) { + uint64_t base = screen->gmem_base + gmem->cbuf_base[buf]; + uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = { + A6XX_TEX_CONST_0_FMT(fd6_texture_format( + format, (enum a6xx_tile_mode)rsc->layout.tile_mode, false)) | + A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) | + A6XX_TEX_CONST_0_SWAP(WZYX) | + A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) | + COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) | + A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) | + A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) | + A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) | + A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])), - A6XX_TEX_CONST_1_WIDTH(pfb->width) | - A6XX_TEX_CONST_1_HEIGHT(pfb->height), + A6XX_TEX_CONST_1_WIDTH(pfb->width) | + A6XX_TEX_CONST_1_HEIGHT(pfb->height), - A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[buf]) | - A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D), + A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[buf]) | + A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D), - A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size), - A6XX_TEX_CONST_4_BASE_LO(base), + A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size), + A6XX_TEX_CONST_4_BASE_LO(base), - A6XX_TEX_CONST_5_BASE_HI(base >> 32) | - A6XX_TEX_CONST_5_DEPTH(prsc->array_size) - }; + A6XX_TEX_CONST_5_BASE_HI(base >> 32) | + A6XX_TEX_CONST_5_DEPTH(prsc->array_size) + }; - memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4); + memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4); + } else if (CHIP >= A8XX) { + /* base address is simply GMEM location when using GMEM tiling: */ + uint64_t base = gmem->cbuf_base[buf]; + uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = { + A8XX_TEX_MEMOBJ_0_BASE_LO(base), + + A8XX_TEX_MEMOBJ_1_BASE_HI(base >> 32) | + A8XX_TEX_MEMOBJ_1_TYPE(A6XX_TEX_2D) | + A8XX_TEX_MEMOBJ_1_DEPTH(prsc->array_size), + + A8XX_TEX_MEMOBJ_2_WIDTH(pfb->width) | + A8XX_TEX_MEMOBJ_2_HEIGHT(pfb->height) | + A8XX_TEX_MEMOBJ_2_SAMPLES(fd_msaa_samples(prsc->nr_samples)), + + A8XX_TEX_MEMOBJ_3_FMT(fd6_texture_format( + format, (enum a6xx_tile_mode)rsc->layout.tile_mode, false)) | + A8XX_TEX_MEMOBJ_3_SWAP(WZYX) | + A8XX_TEX_MEMOBJ_3_SWIZ_X(fdl8_swiz(swiz[0])) | + A8XX_TEX_MEMOBJ_3_SWIZ_Y(fdl8_swiz(swiz[1])) | + A8XX_TEX_MEMOBJ_3_SWIZ_Z(fdl8_swiz(swiz[2])) | + A8XX_TEX_MEMOBJ_3_SWIZ_W(fdl8_swiz(swiz[3])), + + A8XX_TEX_MEMOBJ_4_TILE_MODE(TILE6_2) | + COND(util_format_is_srgb(format), A8XX_TEX_MEMOBJ_4_SRGB), + + 0, + + A8XX_TEX_MEMOBJ_6_TEX_LINE_OFFSET(gmem->bin_w * gmem->cbuf_cpp[buf] * 8) | /* in bits */ + A8XX_TEX_MEMOBJ_6_MIN_LINE_OFFSET(0), + + A8XX_TEX_MEMOBJ_7_ARRAY_SLICE_OFFSET(rsc->layout.layer_size), + }; + + memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4); + } } util_dynarray_clear(&batch->fb_read_patches); @@ -897,7 +934,7 @@ struct bin_size_params { enum a6xx_lrz_feedback_mask lrz_feedback_zmode_mask; }; -/* nregs: 4 */ +/* nregs: 17 */ template static void set_bin_size(fd_crb &crb, const struct fd_gmem_stateobj *gmem, struct bin_size_params p) @@ -905,22 +942,14 @@ set_bin_size(fd_crb &crb, const struct fd_gmem_stateobj *gmem, struct bin_size_p unsigned w = gmem ? gmem->bin_w : 0; unsigned h = gmem ? gmem->bin_h : 0; - if (CHIP == A6XX) { - crb.add(GRAS_SC_BIN_CNTL(CHIP, - .binw = w, .binh = h, - .render_mode = p.render_mode, - .force_lrz_write_dis = p.force_lrz_write_dis, - .buffers_location = p.buffers_location, - .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask, - )); - } else { - crb.add(GRAS_SC_BIN_CNTL(CHIP, - .binw = w, .binh = h, - .render_mode = p.render_mode, - .force_lrz_write_dis = p.force_lrz_write_dis, - .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask, - )); - } + crb.add(GRAS_SC_BIN_CNTL(CHIP, + .binw = w, .binh = h, + .render_mode = p.render_mode, + .force_lrz_write_dis = p.force_lrz_write_dis, + .buffers_location = p.buffers_location, + .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask, + .force_bi_dir_lrz_disable = true, + )); crb.add(RB_CNTL( CHIP, .binw = w, .binh = h, @@ -930,6 +959,28 @@ set_bin_size(fd_crb &crb, const struct fd_gmem_stateobj *gmem, struct bin_size_p .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask, )); crb.add(A6XX_VFD_RENDER_MODE(p.render_mode)); + + if (CHIP >= A8XX) { + crb.add(TPL1_BIN_SIZE(CHIP, .binw = w, .binh = h)); + crb.add(TPL1_A2D_BIN_SIZE(CHIP, .binw = w, .binh = h)); + crb.add(SP_BIN_SIZE(CHIP, .binw = w, .binh = h)); + + for (int i = 0; i < 8; i++) { + crb.add(RB_MRT_GMEM_DIMENSION_REG(CHIP, i, + .width = COND(gmem && gmem->cbuf_cpp[i], w), + .height = COND(gmem && gmem->cbuf_cpp[i], h), + )); + } + crb.add(RB_DEPTH_GMEM_DIMENSION(CHIP, + .width = COND(gmem && gmem->zsbuf_cpp[0], w), + .height = COND(gmem && gmem->zsbuf_cpp[0], h), + )); + crb.add(RB_STENCIL_GMEM_DIMENSION(CHIP, + .width = COND(gmem && (gmem->zsbuf_cpp[0] || gmem->zsbuf_cpp[1]), w), + .height = COND(gmem && (gmem->zsbuf_cpp[0] || gmem->zsbuf_cpp[1]), h), + )); + } + /* no flag for RB_RESOLVE_CNTL_3... */ crb.add(RB_RESOLVE_CNTL_3(CHIP, .binw = w, .binh = h)); } @@ -1044,6 +1095,7 @@ template static void fd7_emit_static_binning_regs(fd_cs &cs, bool gmem) { + bool full_in_gmem = false; /* gen8 TODO */ bool sysmem = !gmem; fd_ncrb ncrb(cs, 4); @@ -1060,6 +1112,16 @@ fd7_emit_static_binning_regs(fd_cs &cs, bool gmem) .rt5_sysmem = sysmem, .rt6_sysmem = sysmem, .rt7_sysmem = sysmem, + .z_full_in_gmem = full_in_gmem, + .s_full_in_gmem = full_in_gmem, + .rt0_full_in_gmem = full_in_gmem, + .rt1_full_in_gmem = full_in_gmem, + .rt2_full_in_gmem = full_in_gmem, + .rt3_full_in_gmem = full_in_gmem, + .rt4_full_in_gmem = full_in_gmem, + .rt5_full_in_gmem = full_in_gmem, + .rt6_full_in_gmem = full_in_gmem, + .rt7_full_in_gmem = full_in_gmem, )); ncrb.add(GRAS_LRZ_CB_CNTL(CHIP, 0x0)); ncrb.add(GRAS_MODE_CNTL(CHIP, 0x2)); @@ -1163,7 +1225,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt emit_msaa(crb, pfb->samples); } - patch_fb_read_gmem(batch); + patch_fb_read_gmem(batch); if (CHIP >= A7XX) { fd7_emit_static_binning_regs(cs, true); @@ -1171,7 +1233,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt if (use_hw_binning(batch)) { /* enable stream-out during binning pass: */ - with_crb (cs, 5) { + with_crb (cs, 18) { crb.add(VPC_SO_OVERRIDE(CHIP, false)); set_bin_size(crb, gmem, { @@ -1184,7 +1246,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt update_render_cntl(cs, screen, pfb, true); emit_binning_pass(cs, batch); - with_crb (cs, 5) { + with_crb (cs, 18) { /* and disable stream-out for draw pass: */ crb.add(VPC_SO_OVERRIDE(CHIP, true)); @@ -1219,7 +1281,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt control_ptr(fd6_context(batch->ctx), vsc_state) )); } else { - with_crb (cs, 5) { + with_crb (cs, 18) { /* no binning pass, so enable stream-out for draw pass: */ crb.add(VPC_SO_OVERRIDE(CHIP, false)); @@ -1240,7 +1302,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt emit_common_init(cs, batch); } -/* nregs: 4 */ +/* nregs: 5 */ template static void set_window_offset(fd_crb &crb, uint32_t x1, uint32_t y1) @@ -1249,6 +1311,8 @@ set_window_offset(fd_crb &crb, uint32_t x1, uint32_t y1) crb.add(A6XX_RB_RESOLVE_WINDOW_OFFSET(.x = x1, .y = y1)); crb.add(SP_WINDOW_OFFSET(CHIP, .x = x1, .y = y1)); crb.add(A6XX_TPL1_WINDOW_OFFSET(.x = x1, .y = y1)); + if (CHIP >= A8XX) + crb.add(TPL1_A2D_WINDOW_OFFSET(CHIP, .x = x1, .y = y1)); } /* before mem2gmem */ @@ -1310,7 +1374,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) fd_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1) .add(0x0); - with_crb (cs, 5) { + with_crb (cs, 18) { crb.add(VPC_SO_OVERRIDE(CHIP, true)); /* @@ -1340,7 +1404,7 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) .add(VPC_SO_OVERRIDE(CHIP, false)); } - with_crb (cs, 8) { + with_crb (cs, 22) { set_window_offset(crb, x1, y1); set_bin_size(crb, gmem, { @@ -1366,8 +1430,8 @@ set_blit_scissor(struct fd_batch *batch, fd_cs &cs) blit_scissor.minx = 0; blit_scissor.miny = 0; - blit_scissor.maxx = align(pfb->width, 16); - blit_scissor.maxy = align(pfb->height, 4); + blit_scissor.maxx = MAX2(1, align(pfb->width, 16)); + blit_scissor.maxy = MAX2(1, align(pfb->height, 4)); fd_pkt4(cs, 2) .add(A6XX_RB_RESOLVE_CNTL_1(.x = blit_scissor.minx, .y = blit_scissor.miny)) @@ -1947,8 +2011,7 @@ emit_sysmem_clears(fd_cs &cs, struct fd_batch *batch, struct fd_batch_subpass *s uint32_t buffers = subpass->fast_cleared; - if (!buffers) - return; + assert(buffers); struct pipe_box box2d; u_box_2d(0, 0, pfb->width, pfb->height, &box2d); @@ -2037,6 +2100,7 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt set_scissor(cs, 0, 0, pfb->width - 1, pfb->height - 1); else set_scissor(cs, 0, 0, 0, 0); + set_blit_scissor(batch, cs); set_tessfactor_bo(cs, batch); @@ -2046,12 +2110,15 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt .add(GRAS_MODE_CNTL(CHIP, 0x2)); } - with_crb (cs, 11) { + with_crb (cs, 25) { set_window_offset(crb, 0, 0); set_bin_size(crb, NULL, { .render_mode = RENDERING_PASS, .buffers_location = BUFFERS_IN_SYSMEM, + .lrz_feedback_zmode_mask = batch->ctx->screen->info->props.has_lrz_feedback + ? LRZ_FEEDBACK_EARLY_Z_LATE_Z + : LRZ_FEEDBACK_NONE, }); if (CHIP >= A7XX) { @@ -2113,7 +2180,7 @@ fd6_emit_sysmem(struct fd_batch *batch) foreach_subpass (subpass, batch) { if (subpass->fast_cleared) { - unsigned flushes = 0; + unsigned flushes = FD6_INVALIDATE_CCHE | FD6_WAIT_FOR_IDLE; if (subpass->fast_cleared & FD_BUFFER_COLOR) flushes |= FD6_INVALIDATE_CCU_COLOR; if (subpass->fast_cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) @@ -2121,6 +2188,7 @@ fd6_emit_sysmem(struct fd_batch *batch) fd6_emit_flushes(batch->ctx, cs, flushes); emit_sysmem_clears(cs, batch, subpass); + fd6_emit_flushes(batch->ctx, cs, flushes); fd6_set_render_mode(cs, {RM6_DIRECT_RENDER}); } @@ -2154,7 +2222,11 @@ fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt fd6_emit_flushes(batch->ctx, cs, FD6_FLUSH_CCU_COLOR | - FD6_FLUSH_CCU_DEPTH); + FD6_INVALIDATE_CCU_COLOR | + FD6_FLUSH_CCU_DEPTH | + FD6_INVALIDATE_CCU_DEPTH | + FD6_INVALIDATE_CCHE | + FD6_WAIT_FOR_IDLE); } template diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.cc b/src/gallium/drivers/freedreno/a6xx/fd6_image.cc index 8f102915ca7..eee9f113d09 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.cc @@ -283,7 +283,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader, } } - if (bufso->enabled_mask) { + if ((CHIP < A8XX) && bufso->enabled_mask) { fd_pkt7(cs, CP_LOAD_STATE6_FRAG, 3) .add(CP_LOAD_STATE6_0( .dst_off = IR3_BINDLESS_SSBO_OFFSET, @@ -299,7 +299,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader, )); } - if (imgso->enabled_mask) { + if ((CHIP < A8XX) && imgso->enabled_mask) { fd_pkt7(cs, CP_LOAD_STATE6_FRAG, 3) .add(CP_LOAD_STATE6_0( .dst_off = IR3_BINDLESS_IMAGE_OFFSET, @@ -329,7 +329,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader, } } - if (bufso->enabled_mask) { + if ((CHIP < A8XX) && bufso->enabled_mask) { fd_pkt7(cs, CP_LOAD_STATE6, 3) .add(CP_LOAD_STATE6_0( .dst_off = IR3_BINDLESS_SSBO_OFFSET, @@ -345,7 +345,7 @@ fd6_build_bindless_state(struct fd_context *ctx, mesa_shader_stage shader, )); } - if (imgso->enabled_mask) { + if ((CHIP < A8XX) && imgso->enabled_mask) { fd_pkt7(cs, CP_LOAD_STATE6, 3) .add(CP_LOAD_STATE6_0( .dst_off = IR3_BINDLESS_IMAGE_OFFSET, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc index ca0d88324fb..35886125033 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc @@ -47,7 +47,7 @@ template static void emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_variant *so) { - fd_crb crb(cs, 11); + fd_crb crb(cs, 12); mesa_shader_stage type = so->type; if (type == MESA_SHADER_KERNEL) @@ -171,6 +171,12 @@ emit_shader_regs(struct fd_context *ctx, fd_cs &cs, const struct ir3_shader_vari .earlypreamble = so->early_preamble, .mergedregs = so->mergedregs, )); + if (CHIP >= A8XX) { + crb.add(RB_PS_CNTL(CHIP, + .pixlodenable = so->need_pixlod, + .lodpixmask = so->need_full_quad, + )); + } crb.add(A6XX_SP_PS_INSTR_SIZE(so->instrlen)); crb.add(A6XX_SP_PS_PROGRAM_COUNTER_OFFSET()); crb.add(A6XX_SP_PS_BASE(so->bo)); @@ -464,6 +470,7 @@ primitive_to_tess(enum mesa_prim primitive) #define MAX_VERTEX_ATTRIBS 32 +template static void emit_vfd_dest(fd_crb &crb, const struct ir3_shader_variant *vs) { @@ -478,6 +485,26 @@ emit_vfd_dest(fd_crb &crb, const struct ir3_shader_variant *vs) .decode_cnt = attr_count )); + if (CHIP >= A8XX) { + const uint32_t vertexid_regid = + ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID); + const uint32_t instanceid_regid = + ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID); + /* Note: we currently don't support multiview. + */ + const uint32_t viewid_regid = INVALID_REG; + + unsigned sideband_count = + (vertexid_regid != INVALID_REG) + + (instanceid_regid != INVALID_REG) + + (viewid_regid != INVALID_REG); + + crb.add(PC_VS_INPUT_CNTL(CHIP, + .instr_cnt = attr_count, + .sideband_cnt = sideband_count, + )); + } + for (uint32_t i = 0; i < attr_count; i++) { assert(!vs->inputs[i].sysval); crb.add(A6XX_VFD_DEST_CNTL_INSTR(i, @@ -1046,7 +1073,7 @@ emit_fs_inputs(fd_crb &crb, const struct program_builder *b) )); } - if (CHIP == A7XX) { + if (CHIP >= A7XX) { for (int i = 0; i < fs->num_sampler_prefetch; i++) { const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i]; crb.add(A6XX_SP_PS_INITIAL_TEX_INDEX_CMD(i, @@ -1057,6 +1084,10 @@ emit_fs_inputs(fd_crb &crb, const struct program_builder *b) } crb.add(SP_LB_PARAM_LIMIT(CHIP, b->ctx->screen->info->props.prim_alloc_threshold)); + + if (CHIP == A8XX) + crb.add(RB_LB_PARAM_LIMIT(CHIP, b->ctx->screen->info->props.prim_alloc_threshold)); + crb.add(SP_REG_PROG_ID_0(CHIP, .faceregid = face_regid, .sampleid = samp_id_regid, @@ -1246,7 +1277,7 @@ setup_stateobj(fd_cs &cs, const struct program_builder *b) crb.add(PC_STEREO_RENDERING_CNTL(CHIP)); - emit_vfd_dest(crb, b->vs); + emit_vfd_dest(crb, b->vs); emit_vpc(crb, b); emit_fs_inputs(crb, b); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.h b/src/gallium/drivers/freedreno/a6xx/fd6_program.h index bc5ba14a943..3d1e0b4b673 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.h @@ -92,6 +92,10 @@ template static inline bool fd6_load_shader_consts_via_preamble(const struct ir3_shader_variant *v) { + if (CHIP == A8XX) { + assert(v->compiler->load_shader_consts_via_preamble); + return true; + } return (CHIP == A7XX) && v->compiler->load_shader_consts_via_preamble; } @@ -99,6 +103,10 @@ template static inline bool fd6_load_inline_uniforms_via_preamble_ldgk(const struct ir3_shader_variant *v) { + if (CHIP == A8XX) { + assert(v->compiler->load_inline_uniforms_via_preamble_ldgk); + return true; + } return (CHIP == A7XX) && v->compiler->load_inline_uniforms_via_preamble_ldgk; } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc index 6c5644871ad..d689d02415d 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_rasterizer.cc @@ -36,7 +36,10 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, psize_max = cso->point_size; } - unsigned nreg = (CHIP >= A7XX) ? 46 : 15; + unsigned nreg = (CHIP >= A7XX) ? 76 : 15; + if (CHIP >= A8XX) + nreg++; + fd_crb crb(ctx->pipe, nreg); crb.add(GRAS_CL_CNTL(CHIP, @@ -94,7 +97,10 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, crb.add(VPC_RAST_CNTL(CHIP, mode)); crb.add(PC_DGEN_RAST_CNTL(CHIP, mode)); - if (CHIP == A7XX || + if (CHIP >= A8XX) + crb.add(GRAS_RAST_CNTL(CHIP, mode)); + + if (CHIP >= A7XX || (CHIP == A6XX && ctx->screen->info->props.is_a702)) { crb.add(VPC_PS_RAST_CNTL(CHIP, mode)); } @@ -113,10 +119,17 @@ __fd6_setup_rasterizer_stateobj(struct fd_context *ctx, for (unsigned i = 0; i < num_viewports; i++) { crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MIN(CHIP, i, 0.0f)); crb.add(GRAS_CL_VIEWPORT_ZCLAMP_MAX(CHIP, i, 1.0f)); + + if (CHIP >= A8XX) { + crb.add(RB_VIEWPORT_ZCLAMP_MIN_REG(CHIP, i, 0.0f)); + crb.add(RB_VIEWPORT_ZCLAMP_MAX_REG(CHIP, i, 1.0f)); + } } - crb.add(RB_VIEWPORT_ZCLAMP_MIN(CHIP, 0.0f)); - crb.add(RB_VIEWPORT_ZCLAMP_MAX(CHIP, 1.0f)); + if (CHIP <= A7XX) { + crb.add(RB_VIEWPORT_ZCLAMP_MIN(CHIP, 0.0f)); + crb.add(RB_VIEWPORT_ZCLAMP_MAX(CHIP, 1.0f)); + } } if (CHIP == A6XX && ctx->screen->info->props.has_legacy_pipeline_shading_rate) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc b/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc index 6a7554bc3a1..2d2926b26a4 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.cc @@ -162,10 +162,40 @@ static const enum pc_di_primtype primtypes[] = { }; /* clang-format on */ +static unsigned +calc_gmem_cache_offsets(struct fd_screen *screen, struct fd6_gmem_config *config) +{ + unsigned num_ccu = screen->info->num_ccu; + + /* Layout from end of gmem: */ + unsigned offset = screen->gmemsize_bytes; + + // ???? + offset -= 0x78000; + + config->vpc_bv_pos_buf_offset = offset - (num_ccu * config->vpc_bv_pos_buf_size); + offset = config->vpc_bv_pos_buf_offset; + + config->vpc_attr_buf_offset = offset - (num_ccu * config->vpc_attr_buf_size); + offset = config->vpc_attr_buf_offset; + + config->vpc_pos_buf_offset = offset - (num_ccu * config->vpc_pos_buf_size); + offset = config->vpc_pos_buf_offset; + + config->color_ccu_offset = offset - (num_ccu * config->color_cache_size); + offset = config->color_ccu_offset; + + config->depth_ccu_offset = offset - (num_ccu * config->depth_cache_size); + offset = config->depth_ccu_offset; + + return offset; +} + void fd6_screen_init(struct pipe_screen *pscreen) { struct fd_screen *screen = fd_screen(pscreen); + const struct fd_dev_info *info = screen->info; screen->max_rts = A6XX_MAX_RENDER_TARGETS; @@ -183,7 +213,27 @@ fd6_screen_init(struct pipe_screen *pscreen) sysmem->depth_ccu_offset = 0; sysmem->color_ccu_offset = sysmem->depth_ccu_offset + depth_cache_size; - if (screen->info->props.has_gmem_vpc_attr_buf) { + /* TODO we could unify gen7/gen8 setup.. gen7 is a subset.. */ + if (info->chip == 8) { + gmem->depth_cache_fraction = info->props.gmem_ccu_depth_cache_fraction; + gmem->depth_cache_size = info->props.gmem_per_ccu_depth_cache_size; + gmem->color_cache_fraction = info->props.gmem_ccu_color_cache_fraction; + gmem->color_cache_size = info->props.gmem_per_ccu_color_cache_size; + gmem->vpc_attr_buf_size = info->props.gmem_vpc_attr_buf_size; + gmem->vpc_pos_buf_size = info->props.gmem_vpc_pos_buf_size; + gmem->vpc_bv_pos_buf_size = info->props.gmem_vpc_bv_pos_buf_size; + + sysmem->depth_cache_fraction = info->props.sysmem_ccu_depth_cache_fraction; + sysmem->depth_cache_size = info->props.sysmem_per_ccu_depth_cache_size; + sysmem->color_cache_fraction = info->props.sysmem_ccu_color_cache_fraction; + sysmem->color_cache_size = info->props.sysmem_per_ccu_color_cache_size; + sysmem->vpc_attr_buf_size = info->props.sysmem_vpc_attr_buf_size; + sysmem->vpc_pos_buf_size = info->props.sysmem_vpc_pos_buf_size; + sysmem->vpc_bv_pos_buf_size = info->props.sysmem_vpc_bv_pos_buf_size; + + calc_gmem_cache_offsets(screen, sysmem); + screen->gmemsize_bytes = calc_gmem_cache_offsets(screen, gmem); + } else if (screen->info->props.has_gmem_vpc_attr_buf) { sysmem->vpc_attr_buf_size = screen->info->props.sysmem_vpc_attr_buf_size; sysmem->vpc_attr_buf_offset = sysmem->color_ccu_offset + color_cache_size; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.cc b/src/gallium/drivers/freedreno/a6xx/fd6_texture.cc index f45be5a0919..e0172e89a85 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.cc @@ -250,6 +250,7 @@ get_bcolor_offset(struct fd_context *ctx, const struct pipe_sampler_state *sampl return idx; } +template static void * fd6_sampler_state_create(struct pipe_context *pctx, const struct pipe_sampler_state *cso) @@ -276,28 +277,67 @@ fd6_sampler_state_create(struct pipe_context *pctx, cso->min_img_filter == PIPE_TEX_FILTER_LINEAR; bool needs_border = false; - so->descriptor[0] = - COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | - A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | - A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | - A6XX_TEX_SAMP_0_ANISO((enum a6xx_tex_aniso)aniso) | - A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &needs_border)) | - A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &needs_border)) | - A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &needs_border)) | - A6XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); - so->descriptor[1] = - COND(cso->compare_mode, A6XX_TEX_SAMP_1_COMPARE_FUNC((enum adreno_compare_func)cso->compare_func)) | - COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE, - A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) | - COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | - COND(cso->unnormalized_coords, A6XX_TEX_SAMP_1_UNNORM_COORDS) | - A6XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | - A6XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); + if (CHIP <= A7XX) { + so->descriptor[0] = + COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | + A6XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | + A6XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | + A6XX_TEX_SAMP_0_ANISO((enum a6xx_tex_aniso)aniso) | + A6XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &needs_border)) | + A6XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &needs_border)) | + A6XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &needs_border)) | + A6XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); - so->descriptor[2] = - A6XX_TEX_SAMP_2_REDUCTION_MODE(reduction_mode(cso->reduction_mode)) | - COND(chroma_linear, A6XX_TEX_SAMP_2_CHROMA_LINEAR); + so->descriptor[1] = + COND(cso->compare_mode, A6XX_TEX_SAMP_1_COMPARE_FUNC((enum adreno_compare_func)cso->compare_func)) | + COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE, + A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) | + COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | + COND(cso->unnormalized_coords, A6XX_TEX_SAMP_1_UNNORM_COORDS) | + A6XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | + A6XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); + + so->descriptor[2] = + A6XX_TEX_SAMP_2_REDUCTION_MODE(reduction_mode(cso->reduction_mode)) | + COND(chroma_linear, A6XX_TEX_SAMP_2_CHROMA_LINEAR); + + } else if (CHIP >= A8XX) { + const float MAX_LOD = 4095.0f / 256.0f; + + so->descriptor[0] = + COND(miplinear, A8XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | + COND(cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE, + A8XX_TEX_SAMP_0_MIPMAPING_DIS) | + A8XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | + A8XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | + A8XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, &needs_border)) | + A8XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, &needs_border)) | + A8XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, &needs_border)) | + A8XX_TEX_SAMP_0_LOD_BIAS(CLAMP(cso->lod_bias, -16, MAX_LOD)) | + A8XX_TEX_SAMP_0_ANISO((enum a6xx_tex_aniso)aniso); + + float min_lod, max_lod; + + if (cso->unnormalized_coords) { + min_lod = max_lod = 0; + } else if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + min_lod = CLAMP(cso->min_lod, 0.0f, MAX_LOD); + max_lod = CLAMP(cso->max_lod, 0.0f, MAX_LOD); + } else { + min_lod = CLAMP(cso->min_lod, 0.0f, 0.25f); + max_lod = CLAMP(cso->max_lod, 0.0f, 0.25f); + } + + so->descriptor[1] = + A8XX_TEX_SAMP_1_MAX_LOD(max_lod) | + A8XX_TEX_SAMP_1_MIN_LOD(min_lod) | + A8XX_TEX_SAMP_1_REDUCTION_MODE(reduction_mode(cso->reduction_mode)) | + COND(cso->compare_mode, A8XX_TEX_SAMP_1_COMPARE_FUNC((enum adreno_compare_func)cso->compare_func)) | + COND(chroma_linear, A8XX_TEX_SAMP_1_CHROMA_LINEAR) | + COND(!cso->seamless_cube_map, A8XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | + COND(cso->unnormalized_coords, A8XX_TEX_SAMP_1_UNNORM_COORDS); + } if (needs_border) { bool fast_border_color_enable = false; @@ -328,11 +368,20 @@ fd6_sampler_state_create(struct pipe_context *pctx, fast_border_color = A6XX_BORDER_COLOR_1_1_1_1; } - if (fast_border_color_enable) { - so->descriptor[2] |= A6XX_TEX_SAMP_2_FASTBORDERCOLOR(fast_border_color) | - A6XX_TEX_SAMP_2_FASTBORDERCOLOREN; - } else { - so->descriptor[2] |= A6XX_TEX_SAMP_2_BCOLOR(get_bcolor_offset(ctx, cso)); + if (CHIP <= A7XX) { + if (fast_border_color_enable) { + so->descriptor[2] |= A6XX_TEX_SAMP_2_FASTBORDERCOLOR(fast_border_color) | + A6XX_TEX_SAMP_2_FASTBORDERCOLOREN; + } else { + so->descriptor[2] |= A6XX_TEX_SAMP_2_BCOLOR(get_bcolor_offset(ctx, cso)); + } + } else if (CHIP >= A8XX) { + if (fast_border_color_enable) { + so->descriptor[2] |= A8XX_TEX_SAMP_2_FASTBORDERCOLOR(fast_border_color) | + A8XX_TEX_SAMP_2_FASTBORDERCOLOREN; + } else { + so->descriptor[2] |= A8XX_TEX_SAMP_2_BCOLOR(get_bcolor_offset(ctx, cso)); + } } } @@ -877,7 +926,7 @@ fd6_texture_init(struct pipe_context *pctx) disable_thread_safety_analysis struct fd_context *ctx = fd_context(pctx); struct fd6_context *fd6_ctx = fd6_context(ctx); - pctx->create_sampler_state = fd6_sampler_state_create; + pctx->create_sampler_state = fd6_sampler_state_create; pctx->delete_sampler_state = fd6_sampler_state_delete; pctx->bind_sampler_states = fd_sampler_states_bind; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc index 7cd8424bece..1ec97d28c22 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.cc @@ -187,7 +187,7 @@ fd6_zsa_state_create(struct pipe_context *pctx, bool depth_clamp_enable = (i & FD6_ZSA_DEPTH_CLAMP); bool no_alpha = (i & FD6_ZSA_NO_ALPHA); - fd_crb crb(ctx->pipe, 9); + fd_crb crb(ctx->pipe, 10); crb.add(A6XX_RB_ALPHA_TEST_CNTL( .alpha_ref = (uint32_t)(cso->alpha_ref_value * 255.0f) & 0xff, @@ -195,6 +195,12 @@ fd6_zsa_state_create(struct pipe_context *pctx, .alpha_test_func = (enum adreno_compare_func)cso->alpha_func, )); + if (CHIP >= A8XX) { + crb.add(SP_ALPHA_TEST_CNTL(CHIP, + .alpha_test = cso->alpha_enabled && !no_alpha, + )); + } + crb.add(A6XX_RB_STENCIL_CNTL( .stencil_enable = fs->enabled, .stencil_enable_bf = bs->enabled, @@ -220,6 +226,7 @@ fd6_zsa_state_create(struct pipe_context *pctx, .z_clamp_enable = depth_clamp_enable || CHIP >= A7XX, .z_read_enable = cso->depth_enabled || cso->depth_bounds_test, .z_bounds_enable = cso->depth_bounds_test, + .o_depth_01_clamp_en = CHIP >= A8XX, )); crb.add(GRAS_SU_DEPTH_CNTL(CHIP, cso->depth_enabled)); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 645dd7f7c65..edd5404a237 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -995,6 +995,11 @@ fd_screen_create(int fd, screen->dev_info = info; screen->info = &screen->dev_info; + if (screen->gen == 8) { + /* gen8 TODO */ + fd_mesa_debug |= FD_DBG_NOLRZ; + } + switch (screen->gen) { case 2: fd2_screen_init(pscreen); @@ -1010,6 +1015,7 @@ fd_screen_create(int fd, break; case 6: case 7: + case 8: fd6_screen_init(pscreen); break; default: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 4596251a0c5..05d66ebf29c 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -60,6 +60,18 @@ struct fd6_gmem_config { /* Vertex attrib cache (a750+): */ uint32_t vpc_attr_buf_size; uint32_t vpc_attr_buf_offset; + + /* Vertex position cache (a8xx+): */ + uint32_t vpc_pos_buf_size; + uint32_t vpc_pos_buf_offset; + uint32_t vpc_bv_pos_buf_size; + uint32_t vpc_bv_pos_buf_offset; + + /* see enum a6xx_ccu_cache_size */ + uint32_t depth_cache_fraction; + uint32_t color_cache_fraction; + uint32_t depth_cache_size; + uint32_t color_cache_size; }; struct fd_screen {