From 3d05d86d88eb83b5c4846ea80d8b23a4271d7803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 31 May 2024 22:36:03 -0400 Subject: [PATCH] radeonsi/gfx12: add DCC Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_descriptors.c | 3 +- src/gallium/drivers/radeonsi/si_pipe.h | 3 ++ .../drivers/radeonsi/si_sdma_copy_image.c | 41 +++++++++++++------ src/gallium/drivers/radeonsi/si_texture.c | 26 ++++++++++++ src/gallium/include/winsys/radeon_winsys.h | 9 ++++ src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 5 +++ 6 files changed, 73 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 9cd4f427e43..d1018554712 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -294,7 +294,8 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture }, .is_stencil = is_stencil, .dcc_enabled = - !(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level), + !(access & SI_IMAGE_ACCESS_DCC_OFF) && + (tex->buffer.flags & RADEON_FLAG_GFX12_ALLOW_DCC || vi_dcc_enabled(tex, first_level)), .tc_compat_htile_enabled = sscreen->info.gfx_level < GFX12 && vi_tc_compat_htile_enabled(tex, first_level, is_stencil ? PIPE_MASK_S : PIPE_MASK_Z), diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 98fba22d7a9..185e7de07a2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1792,6 +1792,9 @@ si_shader_selector_reference(struct si_context *sctx, /* sctx can optionally be static inline bool vi_dcc_enabled(struct si_texture *tex, unsigned level) { + /* Gfx12 always returns false because DCC is transparent to the driver. + * I think DCC doesn't have to be disabled if a color buffer is simultaneously bound as a sampler. + */ return !tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels; } diff --git a/src/gallium/drivers/radeonsi/si_sdma_copy_image.c b/src/gallium/drivers/radeonsi/si_sdma_copy_image.c index a2c1acd74cc..dcda145defc 100644 --- a/src/gallium/drivers/radeonsi/si_sdma_copy_image.c +++ b/src/gallium/drivers/radeonsi/si_sdma_copy_image.c @@ -113,9 +113,15 @@ static bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_textur uint64_t linear_address = linear == ssrc ? src_address : dst_address; struct radeon_cmdbuf *cs = sctx->sdma_cs; assert(tiled->buffer.b.b.depth0 == 1); - bool dcc = false; + bool dcc; if (is_v7) { + /* Compress only when dst has DCC. If src has DCC, it automatically decompresses according + * to PTE.D (page table bit) even if we don't enable DCC in the packet. + */ + dcc = tiled == sdst && + tiled->buffer.flags & RADEON_FLAG_GFX12_ALLOW_DCC; + /* Check if everything fits into the bitfields */ if (!(tiled_width <= (1 << 16) && tiled_height <= (1 << 16) && linear_pitch <= (1 << 16) && linear_slice_pitch <= (1ull << 32) && @@ -160,20 +166,29 @@ static bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_textur radeon_emit(0); if (dcc) { - unsigned hw_fmt = ac_get_cb_format(sctx->gfx_level, tiled->buffer.b.b.format); - unsigned hw_type = ac_get_cb_number_type(tiled->buffer.b.b.format); + unsigned data_format = ac_get_cb_format(sctx->gfx_level, tiled->buffer.b.b.format); + unsigned number_type = ac_get_cb_number_type(tiled->buffer.b.b.format); uint64_t md_address = tiled_address + tiled->surface.meta_offset; - /* Add metadata */ - radeon_emit((uint32_t)md_address); - radeon_emit((uint32_t)(md_address >> 32)); - radeon_emit(hw_fmt | - ac_alpha_is_on_msb(&sctx->screen->info, tiled->buffer.b.b.format) << 8 | - hw_type << 9 | - tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24 | - V_028C78_MAX_BLOCK_SIZE_256B << 26 | - tmz << 29 | - tiled->surface.u.gfx9.color.dcc.pipe_aligned << 31); + if (is_v7) { + radeon_emit(data_format | + number_type << 9) | + (2 << 16) | /* 0: bypass DCC, 2: decompress reads if PTE.D */ + (1 << 18) | /* 0: bypass DCC, 1: write compressed if PTE.D, 2: write uncompressed if PTE.D */ + (tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24) | + (1 << 26); /* max uncompressed block size: 256B */ + } else { + /* Add metadata */ + radeon_emit((uint32_t)md_address); + radeon_emit((uint32_t)(md_address >> 32)); + radeon_emit(data_format | + ac_alpha_is_on_msb(&sctx->screen->info, tiled->buffer.b.b.format) << 8 | + number_type << 9 | + tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24 | + V_028C78_MAX_BLOCK_SIZE_256B << 26 | + tmz << 29 | + tiled->surface.u.gfx9.color.dcc.pipe_aligned << 31); + } } radeon_end(); return true; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 729cd7eb926..d43b42b2684 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -225,6 +225,24 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac ptex->flags & PIPE_RESOURCE_FLAG_SPARSE) flags |= RADEON_SURF_NO_HTILE; } + + /* TODO: Set these for scanout after display DCC is enabled. The reason these are not set is + * because they overlap DCC_OFFSET_256B and the kernel driver incorrectly reads DCC_OFFSET_256B + * on GFX12, which completely breaks the display code. + */ + if (!is_imported && !(ptex->bind & PIPE_BIND_SCANOUT)) { + enum pipe_format format = util_format_get_depth_only(ptex->format); + + /* These should be set for both color and Z/S. */ + surface->u.gfx9.color.dcc_number_type = ac_get_cb_number_type(format); + surface->u.gfx9.color.dcc_data_format = ac_get_cb_format(sscreen->info.gfx_level, format); + } + + if (surface->modifier == DRM_FORMAT_MOD_INVALID && + (ptex->bind & PIPE_BIND_CONST_BW || + sscreen->debug_flags & DBG(NO_DCC) || + (ptex->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_DCC)))) + flags |= RADEON_SURF_DISABLE_DCC; } else { /* Gfx6-11 */ if (!is_flushed_depth && is_depth) { @@ -1028,6 +1046,14 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, /* Create the backing buffer. */ si_init_resource_fields(sscreen, resource, alloc_size, alignment); + /* GFX12: Image descriptors always set COMPRESSION_EN=1, so this is the only thing that + * disables DCC in the driver. + */ + if (sscreen->info.gfx_level >= GFX12 && + resource->domains & RADEON_DOMAIN_VRAM && + surface->u.gfx9.gfx12_enable_dcc) + resource->flags |= RADEON_FLAG_GFX12_ALLOW_DCC; + if (!si_alloc_resource(sscreen, resource)) goto error; } else { diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h index 526a74e209d..e5875c51711 100644 --- a/src/gallium/include/winsys/radeon_winsys.h +++ b/src/gallium/include/winsys/radeon_winsys.h @@ -63,6 +63,7 @@ enum radeon_bo_flag */ RADEON_FLAG_DISCARDABLE = (1 << 10), RADEON_FLAG_WINSYS_SLAB_BACKING = (1 << 11), /* only used by the winsys */ + RADEON_FLAG_GFX12_ALLOW_DCC = (1 << 12), /* allow DCC, VRAM only */ }; static inline void @@ -87,6 +88,8 @@ si_res_print_flags(enum radeon_bo_flag flags) { fprintf(stderr, "DRIVER_INTERNAL "); if (flags & RADEON_FLAG_DISCARDABLE) fprintf(stderr, "DISCARDABLE "); + if (flags & RADEON_FLAG_GFX12_ALLOW_DCC) + fprintf(stderr, "GFX12_ALLOW_DCC "); } enum radeon_map_flags @@ -815,6 +818,7 @@ radeon_bo_drop_reference(struct radeon_winsys *rws, struct pb_buffer_lean *dst) #define RADEON_HEAP_BIT_ENCRYPTED (1 << 3) /* both VRAM and GTT */ #define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 4) /* VRAM only */ +#define RADEON_HEAP_BIT_GFX12_ALLOW_DCC (1 << 5) /* VRAM only */ #define RADEON_HEAP_BIT_WC (1 << 4) /* GTT only, VRAM implies this to be true */ @@ -848,6 +852,8 @@ static inline unsigned radeon_flags_from_heap(int heap) flags |= RADEON_FLAG_GTT_WC; if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS) flags |= RADEON_FLAG_NO_CPU_ACCESS; + if (heap & RADEON_HEAP_BIT_GFX12_ALLOW_DCC) + flags |= RADEON_FLAG_GFX12_ALLOW_DCC; } else { /* GTT only */ if (heap & RADEON_HEAP_BIT_WC) @@ -878,6 +884,7 @@ static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain, break; case RADEON_DOMAIN_GTT: flags &= ~RADEON_FLAG_NO_CPU_ACCESS; + flags &= ~RADEON_FLAG_GFX12_ALLOW_DCC; break; case RADEON_DOMAIN_GDS: case RADEON_DOMAIN_OA: @@ -923,6 +930,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo heap |= RADEON_HEAP_BIT_VRAM; if (flags & RADEON_FLAG_NO_CPU_ACCESS) heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS; + if (flags & RADEON_FLAG_GFX12_ALLOW_DCC) + heap |= RADEON_HEAP_BIT_GFX12_ALLOW_DCC; /* RADEON_FLAG_WC is ignored and implied to be true for VRAM */ } else if (domain == RADEON_DOMAIN_GTT) { /* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 8cd8c5da5c8..5668a2a9be5 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -559,6 +559,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *aws, } } + if (flags & RADEON_FLAG_GFX12_ALLOW_DCC) + request.flags |= AMDGPU_GEM_CREATE_GFX12_DCC; + r = amdgpu_bo_alloc(aws->dev, &request, &buf_handle); if (r) { fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); @@ -1562,6 +1565,8 @@ static struct pb_buffer_lean *amdgpu_bo_from_handle(struct radeon_winsys *rws, flags |= RADEON_FLAG_ENCRYPTED; *((bool*)&rws->uses_secure_bos) = true; } + if (info.alloc_flags & AMDGPU_GEM_CREATE_GFX12_DCC) + flags |= RADEON_FLAG_GFX12_ALLOW_DCC; /* Initialize the structure. */ pipe_reference_init(&bo->b.base.reference, 1);