radeonsi/gfx12: add DCC

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510>
This commit is contained in:
Marek Olšák 2024-05-31 22:36:03 -04:00 committed by Marge Bot
parent 7232995fb5
commit 3d05d86d88
6 changed files with 73 additions and 14 deletions

View file

@ -294,7 +294,8 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture
}, },
.is_stencil = is_stencil, .is_stencil = is_stencil,
.dcc_enabled = .dcc_enabled =
!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level), !(access & SI_IMAGE_ACCESS_DCC_OFF) &&
(tex->buffer.flags & RADEON_FLAG_GFX12_ALLOW_DCC || vi_dcc_enabled(tex, first_level)),
.tc_compat_htile_enabled = .tc_compat_htile_enabled =
sscreen->info.gfx_level < GFX12 && sscreen->info.gfx_level < GFX12 &&
vi_tc_compat_htile_enabled(tex, first_level, is_stencil ? PIPE_MASK_S : PIPE_MASK_Z), vi_tc_compat_htile_enabled(tex, first_level, is_stencil ? PIPE_MASK_S : PIPE_MASK_Z),

View file

@ -1792,6 +1792,9 @@ si_shader_selector_reference(struct si_context *sctx, /* sctx can optionally be
static inline bool vi_dcc_enabled(struct si_texture *tex, unsigned level) static inline bool vi_dcc_enabled(struct si_texture *tex, unsigned level)
{ {
/* Gfx12 always returns false because DCC is transparent to the driver.
* I think DCC doesn't have to be disabled if a color buffer is simultaneously bound as a sampler.
*/
return !tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels; return !tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels;
} }

View file

@ -113,9 +113,15 @@ static bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_textur
uint64_t linear_address = linear == ssrc ? src_address : dst_address; uint64_t linear_address = linear == ssrc ? src_address : dst_address;
struct radeon_cmdbuf *cs = sctx->sdma_cs; struct radeon_cmdbuf *cs = sctx->sdma_cs;
assert(tiled->buffer.b.b.depth0 == 1); assert(tiled->buffer.b.b.depth0 == 1);
bool dcc = false; bool dcc;
if (is_v7) { if (is_v7) {
/* Compress only when dst has DCC. If src has DCC, it automatically decompresses according
* to PTE.D (page table bit) even if we don't enable DCC in the packet.
*/
dcc = tiled == sdst &&
tiled->buffer.flags & RADEON_FLAG_GFX12_ALLOW_DCC;
/* Check if everything fits into the bitfields */ /* Check if everything fits into the bitfields */
if (!(tiled_width <= (1 << 16) && tiled_height <= (1 << 16) && if (!(tiled_width <= (1 << 16) && tiled_height <= (1 << 16) &&
linear_pitch <= (1 << 16) && linear_slice_pitch <= (1ull << 32) && linear_pitch <= (1 << 16) && linear_slice_pitch <= (1ull << 32) &&
@ -160,21 +166,30 @@ static bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_textur
radeon_emit(0); radeon_emit(0);
if (dcc) { if (dcc) {
unsigned hw_fmt = ac_get_cb_format(sctx->gfx_level, tiled->buffer.b.b.format); unsigned data_format = ac_get_cb_format(sctx->gfx_level, tiled->buffer.b.b.format);
unsigned hw_type = ac_get_cb_number_type(tiled->buffer.b.b.format); unsigned number_type = ac_get_cb_number_type(tiled->buffer.b.b.format);
uint64_t md_address = tiled_address + tiled->surface.meta_offset; uint64_t md_address = tiled_address + tiled->surface.meta_offset;
if (is_v7) {
radeon_emit(data_format |
number_type << 9) |
(2 << 16) | /* 0: bypass DCC, 2: decompress reads if PTE.D */
(1 << 18) | /* 0: bypass DCC, 1: write compressed if PTE.D, 2: write uncompressed if PTE.D */
(tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24) |
(1 << 26); /* max uncompressed block size: 256B */
} else {
/* Add metadata */ /* Add metadata */
radeon_emit((uint32_t)md_address); radeon_emit((uint32_t)md_address);
radeon_emit((uint32_t)(md_address >> 32)); radeon_emit((uint32_t)(md_address >> 32));
radeon_emit(hw_fmt | radeon_emit(data_format |
ac_alpha_is_on_msb(&sctx->screen->info, tiled->buffer.b.b.format) << 8 | ac_alpha_is_on_msb(&sctx->screen->info, tiled->buffer.b.b.format) << 8 |
hw_type << 9 | number_type << 9 |
tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24 | tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
V_028C78_MAX_BLOCK_SIZE_256B << 26 | V_028C78_MAX_BLOCK_SIZE_256B << 26 |
tmz << 29 | tmz << 29 |
tiled->surface.u.gfx9.color.dcc.pipe_aligned << 31); tiled->surface.u.gfx9.color.dcc.pipe_aligned << 31);
} }
}
radeon_end(); radeon_end();
return true; return true;
} }

View file

@ -225,6 +225,24 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac
ptex->flags & PIPE_RESOURCE_FLAG_SPARSE) ptex->flags & PIPE_RESOURCE_FLAG_SPARSE)
flags |= RADEON_SURF_NO_HTILE; flags |= RADEON_SURF_NO_HTILE;
} }
/* TODO: Set these for scanout after display DCC is enabled. The reason these are not set is
* because they overlap DCC_OFFSET_256B and the kernel driver incorrectly reads DCC_OFFSET_256B
* on GFX12, which completely breaks the display code.
*/
if (!is_imported && !(ptex->bind & PIPE_BIND_SCANOUT)) {
enum pipe_format format = util_format_get_depth_only(ptex->format);
/* These should be set for both color and Z/S. */
surface->u.gfx9.color.dcc_number_type = ac_get_cb_number_type(format);
surface->u.gfx9.color.dcc_data_format = ac_get_cb_format(sscreen->info.gfx_level, format);
}
if (surface->modifier == DRM_FORMAT_MOD_INVALID &&
(ptex->bind & PIPE_BIND_CONST_BW ||
sscreen->debug_flags & DBG(NO_DCC) ||
(ptex->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_DCC))))
flags |= RADEON_SURF_DISABLE_DCC;
} else { } else {
/* Gfx6-11 */ /* Gfx6-11 */
if (!is_flushed_depth && is_depth) { if (!is_flushed_depth && is_depth) {
@ -1028,6 +1046,14 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
/* Create the backing buffer. */ /* Create the backing buffer. */
si_init_resource_fields(sscreen, resource, alloc_size, alignment); si_init_resource_fields(sscreen, resource, alloc_size, alignment);
/* GFX12: Image descriptors always set COMPRESSION_EN=1, so this is the only thing that
* disables DCC in the driver.
*/
if (sscreen->info.gfx_level >= GFX12 &&
resource->domains & RADEON_DOMAIN_VRAM &&
surface->u.gfx9.gfx12_enable_dcc)
resource->flags |= RADEON_FLAG_GFX12_ALLOW_DCC;
if (!si_alloc_resource(sscreen, resource)) if (!si_alloc_resource(sscreen, resource))
goto error; goto error;
} else { } else {

View file

@ -63,6 +63,7 @@ enum radeon_bo_flag
*/ */
RADEON_FLAG_DISCARDABLE = (1 << 10), RADEON_FLAG_DISCARDABLE = (1 << 10),
RADEON_FLAG_WINSYS_SLAB_BACKING = (1 << 11), /* only used by the winsys */ RADEON_FLAG_WINSYS_SLAB_BACKING = (1 << 11), /* only used by the winsys */
RADEON_FLAG_GFX12_ALLOW_DCC = (1 << 12), /* allow DCC, VRAM only */
}; };
static inline void static inline void
@ -87,6 +88,8 @@ si_res_print_flags(enum radeon_bo_flag flags) {
fprintf(stderr, "DRIVER_INTERNAL "); fprintf(stderr, "DRIVER_INTERNAL ");
if (flags & RADEON_FLAG_DISCARDABLE) if (flags & RADEON_FLAG_DISCARDABLE)
fprintf(stderr, "DISCARDABLE "); fprintf(stderr, "DISCARDABLE ");
if (flags & RADEON_FLAG_GFX12_ALLOW_DCC)
fprintf(stderr, "GFX12_ALLOW_DCC ");
} }
enum radeon_map_flags enum radeon_map_flags
@ -815,6 +818,7 @@ radeon_bo_drop_reference(struct radeon_winsys *rws, struct pb_buffer_lean *dst)
#define RADEON_HEAP_BIT_ENCRYPTED (1 << 3) /* both VRAM and GTT */ #define RADEON_HEAP_BIT_ENCRYPTED (1 << 3) /* both VRAM and GTT */
#define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 4) /* VRAM only */ #define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 4) /* VRAM only */
#define RADEON_HEAP_BIT_GFX12_ALLOW_DCC (1 << 5) /* VRAM only */
#define RADEON_HEAP_BIT_WC (1 << 4) /* GTT only, VRAM implies this to be true */ #define RADEON_HEAP_BIT_WC (1 << 4) /* GTT only, VRAM implies this to be true */
@ -848,6 +852,8 @@ static inline unsigned radeon_flags_from_heap(int heap)
flags |= RADEON_FLAG_GTT_WC; flags |= RADEON_FLAG_GTT_WC;
if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS) if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS)
flags |= RADEON_FLAG_NO_CPU_ACCESS; flags |= RADEON_FLAG_NO_CPU_ACCESS;
if (heap & RADEON_HEAP_BIT_GFX12_ALLOW_DCC)
flags |= RADEON_FLAG_GFX12_ALLOW_DCC;
} else { } else {
/* GTT only */ /* GTT only */
if (heap & RADEON_HEAP_BIT_WC) if (heap & RADEON_HEAP_BIT_WC)
@ -878,6 +884,7 @@ static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain,
break; break;
case RADEON_DOMAIN_GTT: case RADEON_DOMAIN_GTT:
flags &= ~RADEON_FLAG_NO_CPU_ACCESS; flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
flags &= ~RADEON_FLAG_GFX12_ALLOW_DCC;
break; break;
case RADEON_DOMAIN_GDS: case RADEON_DOMAIN_GDS:
case RADEON_DOMAIN_OA: case RADEON_DOMAIN_OA:
@ -923,6 +930,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
heap |= RADEON_HEAP_BIT_VRAM; heap |= RADEON_HEAP_BIT_VRAM;
if (flags & RADEON_FLAG_NO_CPU_ACCESS) if (flags & RADEON_FLAG_NO_CPU_ACCESS)
heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS; heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS;
if (flags & RADEON_FLAG_GFX12_ALLOW_DCC)
heap |= RADEON_HEAP_BIT_GFX12_ALLOW_DCC;
/* RADEON_FLAG_WC is ignored and implied to be true for VRAM */ /* RADEON_FLAG_WC is ignored and implied to be true for VRAM */
} else if (domain == RADEON_DOMAIN_GTT) { } else if (domain == RADEON_DOMAIN_GTT) {
/* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */ /* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */

View file

@ -559,6 +559,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *aws,
} }
} }
if (flags & RADEON_FLAG_GFX12_ALLOW_DCC)
request.flags |= AMDGPU_GEM_CREATE_GFX12_DCC;
r = amdgpu_bo_alloc(aws->dev, &request, &buf_handle); r = amdgpu_bo_alloc(aws->dev, &request, &buf_handle);
if (r) { if (r) {
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
@ -1562,6 +1565,8 @@ static struct pb_buffer_lean *amdgpu_bo_from_handle(struct radeon_winsys *rws,
flags |= RADEON_FLAG_ENCRYPTED; flags |= RADEON_FLAG_ENCRYPTED;
*((bool*)&rws->uses_secure_bos) = true; *((bool*)&rws->uses_secure_bos) = true;
} }
if (info.alloc_flags & AMDGPU_GEM_CREATE_GFX12_DCC)
flags |= RADEON_FLAG_GFX12_ALLOW_DCC;
/* Initialize the structure. */ /* Initialize the structure. */
pipe_reference_init(&bo->b.base.reference, 1); pipe_reference_init(&bo->b.base.reference, 1);