mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 11:30:11 +01:00
radeonsi/gfx12: add DCC
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510>
This commit is contained in:
parent
7232995fb5
commit
3d05d86d88
6 changed files with 73 additions and 14 deletions
|
|
@ -294,7 +294,8 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture
|
||||||
},
|
},
|
||||||
.is_stencil = is_stencil,
|
.is_stencil = is_stencil,
|
||||||
.dcc_enabled =
|
.dcc_enabled =
|
||||||
!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level),
|
!(access & SI_IMAGE_ACCESS_DCC_OFF) &&
|
||||||
|
(tex->buffer.flags & RADEON_FLAG_GFX12_ALLOW_DCC || vi_dcc_enabled(tex, first_level)),
|
||||||
.tc_compat_htile_enabled =
|
.tc_compat_htile_enabled =
|
||||||
sscreen->info.gfx_level < GFX12 &&
|
sscreen->info.gfx_level < GFX12 &&
|
||||||
vi_tc_compat_htile_enabled(tex, first_level, is_stencil ? PIPE_MASK_S : PIPE_MASK_Z),
|
vi_tc_compat_htile_enabled(tex, first_level, is_stencil ? PIPE_MASK_S : PIPE_MASK_Z),
|
||||||
|
|
|
||||||
|
|
@ -1792,6 +1792,9 @@ si_shader_selector_reference(struct si_context *sctx, /* sctx can optionally be
|
||||||
|
|
||||||
static inline bool vi_dcc_enabled(struct si_texture *tex, unsigned level)
|
static inline bool vi_dcc_enabled(struct si_texture *tex, unsigned level)
|
||||||
{
|
{
|
||||||
|
/* Gfx12 always returns false because DCC is transparent to the driver.
|
||||||
|
* I think DCC doesn't have to be disabled if a color buffer is simultaneously bound as a sampler.
|
||||||
|
*/
|
||||||
return !tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels;
|
return !tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -113,9 +113,15 @@ static bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_textur
|
||||||
uint64_t linear_address = linear == ssrc ? src_address : dst_address;
|
uint64_t linear_address = linear == ssrc ? src_address : dst_address;
|
||||||
struct radeon_cmdbuf *cs = sctx->sdma_cs;
|
struct radeon_cmdbuf *cs = sctx->sdma_cs;
|
||||||
assert(tiled->buffer.b.b.depth0 == 1);
|
assert(tiled->buffer.b.b.depth0 == 1);
|
||||||
bool dcc = false;
|
bool dcc;
|
||||||
|
|
||||||
if (is_v7) {
|
if (is_v7) {
|
||||||
|
/* Compress only when dst has DCC. If src has DCC, it automatically decompresses according
|
||||||
|
* to PTE.D (page table bit) even if we don't enable DCC in the packet.
|
||||||
|
*/
|
||||||
|
dcc = tiled == sdst &&
|
||||||
|
tiled->buffer.flags & RADEON_FLAG_GFX12_ALLOW_DCC;
|
||||||
|
|
||||||
/* Check if everything fits into the bitfields */
|
/* Check if everything fits into the bitfields */
|
||||||
if (!(tiled_width <= (1 << 16) && tiled_height <= (1 << 16) &&
|
if (!(tiled_width <= (1 << 16) && tiled_height <= (1 << 16) &&
|
||||||
linear_pitch <= (1 << 16) && linear_slice_pitch <= (1ull << 32) &&
|
linear_pitch <= (1 << 16) && linear_slice_pitch <= (1ull << 32) &&
|
||||||
|
|
@ -160,21 +166,30 @@ static bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_textur
|
||||||
radeon_emit(0);
|
radeon_emit(0);
|
||||||
|
|
||||||
if (dcc) {
|
if (dcc) {
|
||||||
unsigned hw_fmt = ac_get_cb_format(sctx->gfx_level, tiled->buffer.b.b.format);
|
unsigned data_format = ac_get_cb_format(sctx->gfx_level, tiled->buffer.b.b.format);
|
||||||
unsigned hw_type = ac_get_cb_number_type(tiled->buffer.b.b.format);
|
unsigned number_type = ac_get_cb_number_type(tiled->buffer.b.b.format);
|
||||||
uint64_t md_address = tiled_address + tiled->surface.meta_offset;
|
uint64_t md_address = tiled_address + tiled->surface.meta_offset;
|
||||||
|
|
||||||
|
if (is_v7) {
|
||||||
|
radeon_emit(data_format |
|
||||||
|
number_type << 9) |
|
||||||
|
(2 << 16) | /* 0: bypass DCC, 2: decompress reads if PTE.D */
|
||||||
|
(1 << 18) | /* 0: bypass DCC, 1: write compressed if PTE.D, 2: write uncompressed if PTE.D */
|
||||||
|
(tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24) |
|
||||||
|
(1 << 26); /* max uncompressed block size: 256B */
|
||||||
|
} else {
|
||||||
/* Add metadata */
|
/* Add metadata */
|
||||||
radeon_emit((uint32_t)md_address);
|
radeon_emit((uint32_t)md_address);
|
||||||
radeon_emit((uint32_t)(md_address >> 32));
|
radeon_emit((uint32_t)(md_address >> 32));
|
||||||
radeon_emit(hw_fmt |
|
radeon_emit(data_format |
|
||||||
ac_alpha_is_on_msb(&sctx->screen->info, tiled->buffer.b.b.format) << 8 |
|
ac_alpha_is_on_msb(&sctx->screen->info, tiled->buffer.b.b.format) << 8 |
|
||||||
hw_type << 9 |
|
number_type << 9 |
|
||||||
tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
|
tiled->surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
|
||||||
V_028C78_MAX_BLOCK_SIZE_256B << 26 |
|
V_028C78_MAX_BLOCK_SIZE_256B << 26 |
|
||||||
tmz << 29 |
|
tmz << 29 |
|
||||||
tiled->surface.u.gfx9.color.dcc.pipe_aligned << 31);
|
tiled->surface.u.gfx9.color.dcc.pipe_aligned << 31);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
radeon_end();
|
radeon_end();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -225,6 +225,24 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac
|
||||||
ptex->flags & PIPE_RESOURCE_FLAG_SPARSE)
|
ptex->flags & PIPE_RESOURCE_FLAG_SPARSE)
|
||||||
flags |= RADEON_SURF_NO_HTILE;
|
flags |= RADEON_SURF_NO_HTILE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* TODO: Set these for scanout after display DCC is enabled. The reason these are not set is
|
||||||
|
* because they overlap DCC_OFFSET_256B and the kernel driver incorrectly reads DCC_OFFSET_256B
|
||||||
|
* on GFX12, which completely breaks the display code.
|
||||||
|
*/
|
||||||
|
if (!is_imported && !(ptex->bind & PIPE_BIND_SCANOUT)) {
|
||||||
|
enum pipe_format format = util_format_get_depth_only(ptex->format);
|
||||||
|
|
||||||
|
/* These should be set for both color and Z/S. */
|
||||||
|
surface->u.gfx9.color.dcc_number_type = ac_get_cb_number_type(format);
|
||||||
|
surface->u.gfx9.color.dcc_data_format = ac_get_cb_format(sscreen->info.gfx_level, format);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (surface->modifier == DRM_FORMAT_MOD_INVALID &&
|
||||||
|
(ptex->bind & PIPE_BIND_CONST_BW ||
|
||||||
|
sscreen->debug_flags & DBG(NO_DCC) ||
|
||||||
|
(ptex->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_DCC))))
|
||||||
|
flags |= RADEON_SURF_DISABLE_DCC;
|
||||||
} else {
|
} else {
|
||||||
/* Gfx6-11 */
|
/* Gfx6-11 */
|
||||||
if (!is_flushed_depth && is_depth) {
|
if (!is_flushed_depth && is_depth) {
|
||||||
|
|
@ -1028,6 +1046,14 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
|
||||||
/* Create the backing buffer. */
|
/* Create the backing buffer. */
|
||||||
si_init_resource_fields(sscreen, resource, alloc_size, alignment);
|
si_init_resource_fields(sscreen, resource, alloc_size, alignment);
|
||||||
|
|
||||||
|
/* GFX12: Image descriptors always set COMPRESSION_EN=1, so this is the only thing that
|
||||||
|
* disables DCC in the driver.
|
||||||
|
*/
|
||||||
|
if (sscreen->info.gfx_level >= GFX12 &&
|
||||||
|
resource->domains & RADEON_DOMAIN_VRAM &&
|
||||||
|
surface->u.gfx9.gfx12_enable_dcc)
|
||||||
|
resource->flags |= RADEON_FLAG_GFX12_ALLOW_DCC;
|
||||||
|
|
||||||
if (!si_alloc_resource(sscreen, resource))
|
if (!si_alloc_resource(sscreen, resource))
|
||||||
goto error;
|
goto error;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,7 @@ enum radeon_bo_flag
|
||||||
*/
|
*/
|
||||||
RADEON_FLAG_DISCARDABLE = (1 << 10),
|
RADEON_FLAG_DISCARDABLE = (1 << 10),
|
||||||
RADEON_FLAG_WINSYS_SLAB_BACKING = (1 << 11), /* only used by the winsys */
|
RADEON_FLAG_WINSYS_SLAB_BACKING = (1 << 11), /* only used by the winsys */
|
||||||
|
RADEON_FLAG_GFX12_ALLOW_DCC = (1 << 12), /* allow DCC, VRAM only */
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
|
@ -87,6 +88,8 @@ si_res_print_flags(enum radeon_bo_flag flags) {
|
||||||
fprintf(stderr, "DRIVER_INTERNAL ");
|
fprintf(stderr, "DRIVER_INTERNAL ");
|
||||||
if (flags & RADEON_FLAG_DISCARDABLE)
|
if (flags & RADEON_FLAG_DISCARDABLE)
|
||||||
fprintf(stderr, "DISCARDABLE ");
|
fprintf(stderr, "DISCARDABLE ");
|
||||||
|
if (flags & RADEON_FLAG_GFX12_ALLOW_DCC)
|
||||||
|
fprintf(stderr, "GFX12_ALLOW_DCC ");
|
||||||
}
|
}
|
||||||
|
|
||||||
enum radeon_map_flags
|
enum radeon_map_flags
|
||||||
|
|
@ -815,6 +818,7 @@ radeon_bo_drop_reference(struct radeon_winsys *rws, struct pb_buffer_lean *dst)
|
||||||
#define RADEON_HEAP_BIT_ENCRYPTED (1 << 3) /* both VRAM and GTT */
|
#define RADEON_HEAP_BIT_ENCRYPTED (1 << 3) /* both VRAM and GTT */
|
||||||
|
|
||||||
#define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 4) /* VRAM only */
|
#define RADEON_HEAP_BIT_NO_CPU_ACCESS (1 << 4) /* VRAM only */
|
||||||
|
#define RADEON_HEAP_BIT_GFX12_ALLOW_DCC (1 << 5) /* VRAM only */
|
||||||
|
|
||||||
#define RADEON_HEAP_BIT_WC (1 << 4) /* GTT only, VRAM implies this to be true */
|
#define RADEON_HEAP_BIT_WC (1 << 4) /* GTT only, VRAM implies this to be true */
|
||||||
|
|
||||||
|
|
@ -848,6 +852,8 @@ static inline unsigned radeon_flags_from_heap(int heap)
|
||||||
flags |= RADEON_FLAG_GTT_WC;
|
flags |= RADEON_FLAG_GTT_WC;
|
||||||
if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS)
|
if (heap & RADEON_HEAP_BIT_NO_CPU_ACCESS)
|
||||||
flags |= RADEON_FLAG_NO_CPU_ACCESS;
|
flags |= RADEON_FLAG_NO_CPU_ACCESS;
|
||||||
|
if (heap & RADEON_HEAP_BIT_GFX12_ALLOW_DCC)
|
||||||
|
flags |= RADEON_FLAG_GFX12_ALLOW_DCC;
|
||||||
} else {
|
} else {
|
||||||
/* GTT only */
|
/* GTT only */
|
||||||
if (heap & RADEON_HEAP_BIT_WC)
|
if (heap & RADEON_HEAP_BIT_WC)
|
||||||
|
|
@ -878,6 +884,7 @@ static void radeon_canonicalize_bo_flags(enum radeon_bo_domain *_domain,
|
||||||
break;
|
break;
|
||||||
case RADEON_DOMAIN_GTT:
|
case RADEON_DOMAIN_GTT:
|
||||||
flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
|
flags &= ~RADEON_FLAG_NO_CPU_ACCESS;
|
||||||
|
flags &= ~RADEON_FLAG_GFX12_ALLOW_DCC;
|
||||||
break;
|
break;
|
||||||
case RADEON_DOMAIN_GDS:
|
case RADEON_DOMAIN_GDS:
|
||||||
case RADEON_DOMAIN_OA:
|
case RADEON_DOMAIN_OA:
|
||||||
|
|
@ -923,6 +930,8 @@ static inline int radeon_get_heap_index(enum radeon_bo_domain domain, enum radeo
|
||||||
heap |= RADEON_HEAP_BIT_VRAM;
|
heap |= RADEON_HEAP_BIT_VRAM;
|
||||||
if (flags & RADEON_FLAG_NO_CPU_ACCESS)
|
if (flags & RADEON_FLAG_NO_CPU_ACCESS)
|
||||||
heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS;
|
heap |= RADEON_HEAP_BIT_NO_CPU_ACCESS;
|
||||||
|
if (flags & RADEON_FLAG_GFX12_ALLOW_DCC)
|
||||||
|
heap |= RADEON_HEAP_BIT_GFX12_ALLOW_DCC;
|
||||||
/* RADEON_FLAG_WC is ignored and implied to be true for VRAM */
|
/* RADEON_FLAG_WC is ignored and implied to be true for VRAM */
|
||||||
} else if (domain == RADEON_DOMAIN_GTT) {
|
} else if (domain == RADEON_DOMAIN_GTT) {
|
||||||
/* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */
|
/* GTT is implied by RADEON_HEAP_BIT_VRAM not being set. */
|
||||||
|
|
|
||||||
|
|
@ -559,6 +559,9 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *aws,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (flags & RADEON_FLAG_GFX12_ALLOW_DCC)
|
||||||
|
request.flags |= AMDGPU_GEM_CREATE_GFX12_DCC;
|
||||||
|
|
||||||
r = amdgpu_bo_alloc(aws->dev, &request, &buf_handle);
|
r = amdgpu_bo_alloc(aws->dev, &request, &buf_handle);
|
||||||
if (r) {
|
if (r) {
|
||||||
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
|
fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
|
||||||
|
|
@ -1562,6 +1565,8 @@ static struct pb_buffer_lean *amdgpu_bo_from_handle(struct radeon_winsys *rws,
|
||||||
flags |= RADEON_FLAG_ENCRYPTED;
|
flags |= RADEON_FLAG_ENCRYPTED;
|
||||||
*((bool*)&rws->uses_secure_bos) = true;
|
*((bool*)&rws->uses_secure_bos) = true;
|
||||||
}
|
}
|
||||||
|
if (info.alloc_flags & AMDGPU_GEM_CREATE_GFX12_DCC)
|
||||||
|
flags |= RADEON_FLAG_GFX12_ALLOW_DCC;
|
||||||
|
|
||||||
/* Initialize the structure. */
|
/* Initialize the structure. */
|
||||||
pipe_reference_init(&bo->b.base.reference, 1);
|
pipe_reference_init(&bo->b.base.reference, 1);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue