diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index c03206d00e3..459d40cfe56 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -854,10 +854,10 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config * ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut); if (ret == ADDR_OK) { - dcc_level->dcc_offset = surf->dcc_size; - surf->num_dcc_levels = level + 1; - surf->dcc_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize; - surf->dcc_alignment_log2 = MAX2(surf->dcc_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign)); + dcc_level->dcc_offset = surf->meta_size; + surf->num_meta_levels = level + 1; + surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize; + surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign)); /* If the DCC size of a subresource (1 mip level or 1 slice) * is not aligned, the DCC memory layout is not contiguous for @@ -880,7 +880,7 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config * * provide this info. As DCC memory is linear (each * slice is the same size) it's easy to compute. */ - surf->dcc_slice_size = AddrDccOut->dccRamSize / config->info.array_size; + surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size; /* For arrays, we have to compute the DCC info again * with one slice size to get a correct fast clear @@ -906,9 +906,9 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config * } if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS && - surf->dcc_slice_size != dcc_level->dcc_slice_fast_clear_size) { - surf->dcc_size = 0; - surf->num_dcc_levels = 0; + surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) { + surf->meta_size = 0; + surf->num_meta_levels = 0; AddrDccOut->subLvlCompressible = false; } } else { @@ -933,10 +933,10 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config * ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut); if (ret == ADDR_OK) { - surf->htile_size = AddrHtileOut->htileBytes; - surf->htile_slice_size = AddrHtileOut->sliceSize; - surf->htile_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign); - surf->num_htile_levels = level + 1; + surf->meta_size = AddrHtileOut->htileBytes; + surf->meta_slice_size = AddrHtileOut->sliceSize; + surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign); + surf->num_meta_levels = level + 1; } } @@ -1323,13 +1323,11 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i } surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); - surf->num_dcc_levels = 0; + surf->num_meta_levels = 0; surf->surf_size = 0; - surf->dcc_size = 0; - surf->dcc_alignment_log2 = 0; - surf->htile_size = 0; - surf->htile_slice_size = 0; - surf->htile_alignment_log2 = 0; + surf->meta_size = 0; + surf->meta_slice_size = 0; + surf->meta_alignment_log2 = 0; const bool only_stencil = (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER); @@ -1468,7 +1466,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i * This is what addrlib does, but calling addrlib would be a lot more * complicated. */ - if (surf->dcc_size && config->info.levels > 1) { + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) { /* The smallest miplevels that are never compressed by DCC * still read the DCC buffer via TC if the base level uses DCC, * and for some reason the DCC buffer needs to be larger if @@ -1477,22 +1475,22 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *i * * "dcc_alignment * 4" was determined by trial and error. */ - surf->dcc_size = align64(surf->surf_size >> 8, (1 << surf->dcc_alignment_log2) * 4); + surf->meta_size = align64(surf->surf_size >> 8, (1 << surf->meta_alignment_log2) * 4); } /* Make sure HTILE covers the whole miptree, because the shader reads * TC-compatible HTILE even for levels where it's disabled by DB. */ - if (surf->htile_size && config->info.levels > 1 && - surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) { + if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) && + surf->meta_size && config->info.levels > 1) { /* MSAA can't occur with levels > 1, so ignore the sample count. */ const unsigned total_pixels = surf->surf_size / surf->bpe; const unsigned htile_block_size = 8 * 8; const unsigned htile_element_size = 4; - surf->htile_size = (total_pixels / htile_block_size) * htile_element_size; - surf->htile_size = align(surf->htile_size, 1 << surf->htile_alignment_log2); - } else if (!surf->htile_size) { + surf->meta_size = (total_pixels / htile_block_size) * htile_element_size; + surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2); + } else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) { /* Unset this if HTILE is not present. */ surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; } @@ -1800,10 +1798,10 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_ if (ret != ADDR_OK) return ret; - surf->htile_size = hout.htileBytes; - surf->htile_slice_size = hout.sliceSize; - surf->htile_alignment_log2 = util_logbase2(hout.baseAlign); - surf->num_htile_levels = in->numMipLevels; + surf->meta_size = hout.htileBytes; + surf->meta_slice_size = hout.sliceSize; + surf->meta_alignment_log2 = util_logbase2(hout.baseAlign); + surf->num_meta_levels = in->numMipLevels; for (unsigned i = 0; i < in->numMipLevels; i++) { surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset; @@ -1813,13 +1811,13 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_ /* GFX10 can only compress the first level * in the mip tail. */ - surf->num_htile_levels = i + 1; + surf->num_meta_levels = i + 1; break; } } - if (!surf->num_htile_levels) - surf->htile_size = 0; + if (!surf->num_meta_levels) + surf->meta_size = 0; return 0; } @@ -1891,10 +1889,10 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_ surf->u.gfx9.dcc_block_height = dout.compressBlkHeight; surf->u.gfx9.dcc_block_depth = dout.compressBlkDepth; surf->u.gfx9.dcc_pitch_max = dout.pitch - 1; - surf->dcc_size = dout.dccRamSize; - surf->dcc_slice_size = dout.dccRamSliceSize; - surf->dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); - surf->num_dcc_levels = in->numMipLevels; + surf->meta_size = dout.dccRamSize; + surf->meta_slice_size = dout.dccRamSliceSize; + surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); + surf->num_meta_levels = in->numMipLevels; /* Disable DCC for levels that are in the mip tail. * @@ -1930,23 +1928,23 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_ * if there are no regressions. */ if (info->chip_class >= GFX10) - surf->num_dcc_levels = i + 1; + surf->num_meta_levels = i + 1; else - surf->num_dcc_levels = i; + surf->num_meta_levels = i; break; } } - if (!surf->num_dcc_levels) - surf->dcc_size = 0; + if (!surf->num_meta_levels) + surf->meta_size = 0; - surf->u.gfx9.display_dcc_size = surf->dcc_size; - surf->u.gfx9.display_dcc_alignment_log2 = surf->dcc_alignment_log2; + surf->u.gfx9.display_dcc_size = surf->meta_size; + surf->u.gfx9.display_dcc_alignment_log2 = surf->meta_alignment_log2; surf->u.gfx9.display_dcc_pitch_max = surf->u.gfx9.dcc_pitch_max; /* Compute displayable DCC. */ if (((in->flags.display && info->use_display_dcc_with_retile_blit) || - ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_dcc_levels) { + ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) { /* Compute displayable DCC info. */ din.dccKeyFlags.pipeAligned = 0; din.dccKeyFlags.rbAligned = 0; @@ -1964,10 +1962,10 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_ surf->u.gfx9.display_dcc_size = dout.dccRamSize; surf->u.gfx9.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1; - assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size); + assert(surf->u.gfx9.display_dcc_size <= surf->meta_size); surf->u.gfx9.dcc_retile_use_uint16 = - surf->u.gfx9.display_dcc_size <= UINT16_MAX + 1 && surf->dcc_size <= UINT16_MAX + 1; + surf->u.gfx9.display_dcc_size <= UINT16_MAX + 1 && surf->meta_size <= UINT16_MAX + 1; /* Align the retile map size to get more hash table hits and * decrease the maximum memory footprint when all retile maps @@ -2327,12 +2325,11 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_ surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType; surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); - surf->num_dcc_levels = 0; + surf->num_meta_levels = 0; surf->surf_size = 0; surf->fmask_size = 0; - surf->dcc_size = 0; - surf->htile_size = 0; - surf->htile_slice_size = 0; + surf->meta_size = 0; + surf->meta_slice_size = 0; surf->u.gfx9.surf_offset = 0; if (AddrSurfInfoIn.flags.stencil) surf->u.gfx9.stencil_offset = 0; @@ -2382,7 +2379,8 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_ return r; /* Display needs unaligned DCC. */ - if (surf->num_dcc_levels && + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + surf->num_meta_levels && (!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.dcc.rb_aligned, surf->u.gfx9.dcc.pipe_aligned) || /* Don't set is_displayable if displayable DCC is missing. */ @@ -2395,7 +2393,7 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_ assert(!AddrSurfInfoIn.flags.display || surf->is_displayable); /* Validate that DCC is set up correctly. */ - if (surf->num_dcc_levels) { + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) { assert(is_dcc_supported_by_L2(info, surf)); if (AddrSurfInfoIn.flags.color) assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode)); @@ -2415,15 +2413,15 @@ static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_ /* Validate that DCC is enabled if DCN can do it. */ if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) && AddrSurfInfoIn.flags.display && surf->bpe == 4) { - assert(surf->num_dcc_levels); + assert(surf->num_meta_levels); } /* Validate that non-scanout DCC is always enabled. */ if (!AddrSurfInfoIn.flags.display) - assert(surf->num_dcc_levels); + assert(surf->num_meta_levels); } - if (!surf->htile_size) { + if (!surf->meta_size) { /* Unset this if HTILE is not present. */ surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; } @@ -2506,15 +2504,7 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf surf->alignment_log2 = surf->surf_alignment_log2; /* Ensure the offsets are always 0 if not available. */ - surf->dcc_offset = surf->display_dcc_offset = 0; - surf->fmask_offset = surf->cmask_offset = 0; - surf->htile_offset = 0; - - if (surf->htile_size) { - surf->htile_offset = align64(surf->total_size, 1 << surf->htile_alignment_log2); - surf->total_size = surf->htile_offset + surf->htile_size; - surf->alignment_log2 = MAX2(surf->alignment_log2, surf->htile_alignment_log2); - } + surf->meta_offset = surf->display_dcc_offset = surf->fmask_offset = surf->cmask_offset = 0; if (surf->fmask_size) { assert(config->info.samples >= 2); @@ -2533,21 +2523,22 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf if (surf->is_displayable) surf->flags |= RADEON_SURF_SCANOUT; - if (surf->dcc_size && + if (surf->meta_size && /* dcc_size is computed on GFX9+ only if it's displayable. */ (info->chip_class >= GFX9 || !get_display_flag(config, surf))) { /* It's better when displayable DCC is immediately after * the image due to hw-specific reasons. */ - if (info->chip_class >= GFX9 && surf->u.gfx9.dcc_retile_num_elements) { + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + info->chip_class >= GFX9 && surf->u.gfx9.dcc_retile_num_elements) { /* Add space for the displayable DCC buffer. */ surf->display_dcc_offset = align64(surf->total_size, 1 << surf->u.gfx9.display_dcc_alignment_log2); surf->total_size = surf->display_dcc_offset + surf->u.gfx9.display_dcc_size; } - surf->dcc_offset = align64(surf->total_size, 1 << surf->dcc_alignment_log2); - surf->total_size = surf->dcc_offset + surf->dcc_size; - surf->alignment_log2 = MAX2(surf->alignment_log2, surf->dcc_alignment_log2); + surf->meta_offset = align64(surf->total_size, 1 << surf->meta_alignment_log2); + surf->total_size = surf->meta_offset + surf->meta_size; + surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2); } return 0; @@ -2556,9 +2547,12 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf /* This is meant to be used for disabling DCC. */ void ac_surface_zero_dcc_fields(struct radeon_surf *surf) { - surf->dcc_offset = 0; + if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) + return; + + surf->meta_offset = 0; surf->display_dcc_offset = 0; - if (!surf->htile_offset && !surf->fmask_offset && !surf->cmask_offset) { + if (!surf->fmask_offset && !surf->cmask_offset) { surf->total_size = surf->surf_size; surf->alignment_log2 = surf->surf_alignment_log2; } @@ -2666,8 +2660,8 @@ void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_su if (info->chip_class >= GFX9) { uint64_t dcc_offset = 0; - if (surf->dcc_offset) { - dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->dcc_offset; + if (surf->meta_offset) { + dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset; assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24)); } @@ -2767,11 +2761,11 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_s /* Read DCC information. */ switch (info->chip_class) { case GFX8: - surf->dcc_offset = (uint64_t)desc[7] << 8; + surf->meta_offset = (uint64_t)desc[7] << 8; break; case GFX9: - surf->dcc_offset = + surf->meta_offset = ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40); surf->u.gfx9.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]); surf->u.gfx9.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]); @@ -2783,7 +2777,7 @@ bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_s case GFX10: case GFX10_3: - surf->dcc_offset = + surf->meta_offset = ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16); surf->u.gfx9.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]); break; @@ -2815,18 +2809,18 @@ void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_s case GFX7: break; case GFX8: - desc[7] = surf->dcc_offset >> 8; + desc[7] = surf->meta_offset >> 8; break; case GFX9: - desc[7] = surf->dcc_offset >> 8; + desc[7] = surf->meta_offset >> 8; desc[5] &= C_008F24_META_DATA_ADDRESS; - desc[5] |= S_008F24_META_DATA_ADDRESS(surf->dcc_offset >> 40); + desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40); break; case GFX10: case GFX10_3: desc[6] &= C_00A018_META_DATA_ADDRESS_LO; - desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->dcc_offset >> 8); - desc[7] = surf->dcc_offset >> 16; + desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8); + desc[7] = surf->meta_offset >> 16; break; default: assert(0); @@ -2938,14 +2932,12 @@ bool ac_surface_override_offset_stride(const struct radeon_info *info, struct ra offset >= UINT64_MAX - surf->total_size) return false; - if (surf->htile_offset) - surf->htile_offset += offset; + if (surf->meta_offset) + surf->meta_offset += offset; if (surf->fmask_offset) surf->fmask_offset += offset; if (surf->cmask_offset) surf->cmask_offset += offset; - if (surf->dcc_offset) - surf->dcc_offset += offset; if (surf->display_dcc_offset) surf->display_dcc_offset += offset; return true; @@ -2957,7 +2949,7 @@ unsigned ac_surface_get_nplanes(const struct radeon_surf *surf) return 1; else if (surf->display_dcc_offset) return 3; - else if (surf->dcc_offset) + else if (surf->meta_offset) return 2; else return 1; @@ -2979,10 +2971,10 @@ uint64_t ac_surface_get_plane_offset(enum chip_class chip_class, case 1: assert(!layer); return surf->display_dcc_offset ? - surf->display_dcc_offset : surf->dcc_offset; + surf->display_dcc_offset : surf->meta_offset; case 2: assert(!layer); - return surf->dcc_offset; + return surf->meta_offset; default: unreachable("Invalid plane index"); } @@ -3017,9 +3009,9 @@ uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf, return surf->surf_size; case 1: return surf->display_dcc_offset ? - surf->u.gfx9.display_dcc_size : surf->dcc_size; + surf->u.gfx9.display_dcc_size : surf->meta_size; case 2: - return surf->dcc_size; + return surf->meta_size; default: unreachable("Invalid plane index"); } @@ -3059,18 +3051,18 @@ void ac_surface_print_info(FILE *out, const struct radeon_info *info, surf->cmask_offset, surf->cmask_size, 1 << surf->cmask_alignment_log2); - if (surf->htile_offset) + if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset) fprintf(out, " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", - surf->htile_offset, surf->htile_size, - 1 << surf->htile_alignment_log2); + surf->meta_offset, surf->meta_size, + 1 << surf->meta_alignment_log2); - if (surf->dcc_offset) + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset) fprintf(out, " DCC: offset=%" PRIu64 ", size=%u, " "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", - surf->dcc_offset, surf->dcc_size, 1 << surf->dcc_alignment_log2, - surf->u.gfx9.display_dcc_pitch_max, surf->num_dcc_levels); + surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2, + surf->u.gfx9.display_dcc_pitch_max, surf->num_meta_levels); if (surf->u.gfx9.stencil_offset) fprintf(out, @@ -3112,14 +3104,14 @@ void ac_surface_print_info(FILE *out, const struct radeon_info *info, surf->cmask_offset, surf->cmask_size, 1 << surf->cmask_alignment_log2, surf->u.legacy.cmask_slice_tile_max); - if (surf->htile_offset) + if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset) fprintf(out, " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", - surf->htile_offset, surf->htile_size, - 1 << surf->htile_alignment_log2); + surf->meta_offset, surf->meta_size, + 1 << surf->meta_alignment_log2); - if (surf->dcc_offset) + if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset) fprintf(out, " DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n", - surf->dcc_offset, surf->dcc_size, 1 << surf->dcc_alignment_log2); + surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2); if (surf->has_stencil) fprintf(out, " StencilLayout: tilesplit=%u\n", diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index b4964aa6c35..b574f2ce2e4 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -238,12 +238,11 @@ struct radeon_surf { uint8_t bpe : 5; /* Display, standard(thin), depth, render(rotated). AKA D,S,Z,R swizzle modes. */ uint8_t micro_tile_mode : 3; - /* Number of mipmap levels where DCC is enabled starting from level 0. + /* Number of mipmap levels where DCC or HTILE is enabled starting from level 0. * Non-zero levels may be disabled due to alignment constraints, but not * the first level. */ - uint8_t num_dcc_levels : 4; - uint8_t num_htile_levels : 4; + uint8_t num_meta_levels : 4; uint8_t is_linear : 1; uint8_t has_stencil : 1; /* This might be true even if micro_tile_mode isn't displayable or rotated. */ @@ -278,8 +277,7 @@ struct radeon_surf { /* Use (1 << log2) to compute the alignment. */ uint8_t surf_alignment_log2; uint8_t fmask_alignment_log2; - uint8_t dcc_alignment_log2; - uint8_t htile_alignment_log2; + uint8_t meta_alignment_log2; /* DCC or HTILE */ uint8_t cmask_alignment_log2; uint8_t alignment_log2; @@ -293,21 +291,17 @@ struct radeon_surf { uint64_t fmask_size; uint32_t fmask_slice_size; /* max 2^31 (16K * 16K * 8) */ - /* DCC and HTILE are very small. */ - uint32_t dcc_size; - uint32_t dcc_slice_size; - - uint32_t htile_size; - uint32_t htile_slice_size; + /* DCC and HTILE (they are very small) */ + uint32_t meta_size; + uint32_t meta_slice_size; uint32_t cmask_size; uint32_t cmask_slice_size; /* All buffers combined. */ - uint64_t htile_offset; + uint64_t meta_offset; /* DCC or HTILE */ uint64_t fmask_offset; uint64_t cmask_offset; - uint64_t dcc_offset; uint64_t display_dcc_offset; uint64_t total_size; diff --git a/src/amd/common/ac_surface_modifier_test.c b/src/amd/common/ac_surface_modifier_test.c index 3ad38be9136..5b74fa07728 100644 --- a/src/amd/common/ac_surface_modifier_test.c +++ b/src/amd/common/ac_surface_modifier_test.c @@ -123,7 +123,7 @@ void generate_hash(struct ac_addrlib *ac_addrlib, _mesa_sha1_init(&ctx); _mesa_sha1_update(&ctx, &surf->total_size, sizeof(surf->total_size)); - _mesa_sha1_update(&ctx, &surf->dcc_offset, sizeof(surf->dcc_offset)); + _mesa_sha1_update(&ctx, &surf->meta_offset, sizeof(surf->meta_offset)); _mesa_sha1_update(&ctx, &surf->display_dcc_offset, sizeof(surf->display_dcc_offset)); _mesa_sha1_update(&ctx, &surf->u.gfx9.display_dcc_pitch_max, sizeof(surf->u.gfx9.display_dcc_pitch_max)); @@ -142,7 +142,7 @@ void generate_hash(struct ac_addrlib *ac_addrlib, input.pitchInElement = surf->u.gfx9.surf_pitch; ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT dcc_input = {0}; - if (surf->dcc_offset) { + if (surf->meta_offset) { dcc_input = get_addr_from_coord_base(addrlib, surf, entry->w, entry->h, entry->format, surf->u.gfx9.dcc.rb_aligned, @@ -172,7 +172,7 @@ void generate_hash(struct ac_addrlib *ac_addrlib, _mesa_sha1_update(&ctx, &output.addr, sizeof(output.addr)); - if (surf->dcc_offset) { + if (surf->meta_offset) { dcc_input.x = (x & INT_MAX) % entry->w; dcc_input.y = (y & INT_MAX) % entry->h; @@ -255,7 +255,6 @@ static void test_modifier(const struct radeon_info *info, int r = ac_compute_surface(addrlib, info, &config, RADEON_SURF_MODE_2D, &surf); assert(!r); - assert(surf.htile_offset == 0); assert(surf.cmask_offset == 0); assert(surf.fmask_offset == 0); @@ -316,17 +315,17 @@ static void test_modifier(const struct radeon_info *info, } expected_offset = align(expected_offset, dcc_align); - assert(surf.dcc_offset == expected_offset); + assert(surf.meta_offset == expected_offset); uint64_t dcc_size = block_count(dims[i][0], dims[i][1], elem_bits, block_bits, NULL, NULL) << (block_bits - 8); dcc_size = align64(dcc_size, dcc_align); - assert(surf.dcc_size == dcc_size); + assert(surf.meta_size == dcc_size); expected_offset += dcc_size; } else - assert(!surf.dcc_offset); + assert(!surf.meta_offset); assert(surf.total_size == expected_offset); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 595b990ab11..bcc604f74ad 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5905,7 +5905,7 @@ radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, * expanded". */ /* Compute the size of all fast clearable DCC levels. */ - for (unsigned i = 0; i < image->planes[0].surface.num_dcc_levels; i++) { + for (unsigned i = 0; i < image->planes[0].surface.num_meta_levels; i++) { struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.dcc_level[i]; unsigned dcc_fast_clear_size = dcc_level->dcc_slice_fast_clear_size * image->info.array_size; @@ -5917,10 +5917,10 @@ radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, } /* Initialize the mipmap levels without DCC. */ - if (size != image->planes[0].surface.dcc_size) { + if (size != image->planes[0].surface.meta_size) { flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, - image->offset + image->planes[0].surface.dcc_offset + size, - image->planes[0].surface.dcc_size - size, 0xffffffff); + image->offset + image->planes[0].surface.meta_offset + size, + image->planes[0].surface.meta_size - size, 0xffffffff); } } diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 757f18b55af..fa485c6089e 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -6306,7 +6306,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff .pipe_aligned = 1, }; - if (surf->dcc_offset) + if (surf->meta_offset) meta = surf->u.gfx9.dcc; cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | @@ -6357,14 +6357,14 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff cb->cb_color_cmask = va >> 8; va = radv_buffer_get_va(iview->bo) + iview->image->offset; - va += surf->dcc_offset; + va += surf->meta_offset; if (radv_dcc_enabled(iview->image, iview->base_mip) && device->physical_device->rad_info.chip_class <= GFX8) va += plane->surface.u.legacy.dcc_level[iview->base_mip].dcc_offset; unsigned dcc_tile_swizzle = surf->tile_swizzle; - dcc_tile_swizzle &= ((1 << surf->dcc_alignment_log2) - 1) >> 8; + dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8; cb->cb_dcc_base = va >> 8; cb->cb_dcc_base |= dcc_tile_swizzle; @@ -6619,7 +6619,7 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1); } - va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->htile_offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->meta_offset; ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); @@ -6684,7 +6684,7 @@ radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_inf ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); } - va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->htile_offset; + va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->meta_offset; ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1); diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 445d87ad8c3..650f8ecdef7 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -700,15 +700,15 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im state[6] &= C_008F28_COMPRESSION_EN; state[7] = 0; if (!disable_compression && radv_dcc_enabled(image, first_level)) { - meta_va = gpu_address + plane->surface.dcc_offset; + meta_va = gpu_address + plane->surface.meta_offset; if (chip_class <= GFX8) meta_va += plane->surface.u.legacy.dcc_level[base_level].dcc_offset; unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8; - dcc_tile_swizzle &= (1 << plane->surface.dcc_alignment_log2) - 1; + dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1; meta_va |= dcc_tile_swizzle; } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) { - meta_va = gpu_address + plane->surface.htile_offset; + meta_va = gpu_address + plane->surface.meta_offset; } if (meta_va) { @@ -735,7 +735,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im .pipe_aligned = 1, }; - if (plane->surface.dcc_offset) + if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER)) meta = plane->surface.u.gfx9.dcc; if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression) @@ -766,7 +766,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *im .pipe_aligned = 1, }; - if (plane->surface.dcc_offset) + if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER)) meta = plane->surface.u.gfx9.dcc; state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | @@ -1055,7 +1055,8 @@ si_make_texture_descriptor(struct radv_device *device, struct radv_image *image, state[4] |= S_008F20_DEPTH(depth - 1); state[5] |= S_008F24_LAST_ARRAY(last_layer); } - if (image->planes[0].surface.dcc_offset) { + if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && + image->planes[0].surface.meta_offset) { state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format)); } else { /* The last dword is unused by hw. The shader uses it to clear @@ -1213,7 +1214,7 @@ radv_init_metadata(struct radv_device *device, struct radv_image *image, if (device->physical_device->rad_info.chip_class >= GFX9) { uint64_t dcc_offset = image->offset + - (surface->display_dcc_offset ? surface->display_dcc_offset : surface->dcc_offset); + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset); metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode; metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8; metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.display_dcc_pitch_max; @@ -1338,7 +1339,7 @@ radv_image_init_retile_map(struct radv_device *device, struct radv_image *image) image->retile_map = NULL; if (!radv_image_has_dcc(image) || !image->planes[0].surface.display_dcc_offset || - image->planes[0].surface.display_dcc_offset == image->planes[0].surface.dcc_offset) + image->planes[0].surface.display_dcc_offset == image->planes[0].surface.meta_offset) return VK_SUCCESS; uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface); diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index ce5dce8de6e..2abc1fc40b2 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -1275,19 +1275,19 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, radv_update_dcc_metadata(cmd_buffer, image, range, true); for (uint32_t l = 0; l < level_count; l++) { - uint64_t offset = image->offset + image->planes[0].surface.dcc_offset; + uint64_t offset = image->offset + image->planes[0].surface.meta_offset; uint32_t level = range->baseMipLevel + l; uint64_t size; if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { /* DCC for mipmaps+layers is currently disabled. */ - offset += image->planes[0].surface.dcc_slice_size * range->baseArrayLayer + + offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer + image->planes[0].surface.u.gfx9.meta_levels[level].offset; size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count; } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { /* Mipmap levels and layers aren't implemented. */ assert(level == 0); - size = image->planes[0].surface.dcc_size; + size = image->planes[0].surface.meta_size; } else { const struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.dcc_level[level]; @@ -1329,7 +1329,7 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im /* Clear individuals levels separately. */ for (uint32_t l = 0; l < level_count; l++) { uint32_t level = range->baseMipLevel + l; - uint64_t offset = image->offset + image->planes[0].surface.htile_offset + + uint64_t offset = image->offset + image->planes[0].surface.meta_offset + image->planes[0].surface.u.gfx9.meta_levels[level].offset; uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size; @@ -1348,9 +1348,9 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im } } else { unsigned layer_count = radv_get_layerCount(image, range); - uint64_t size = image->planes[0].surface.htile_slice_size * layer_count; - uint64_t offset = image->offset + image->planes[0].surface.htile_offset + - image->planes[0].surface.htile_slice_size * range->baseArrayLayer; + uint64_t size = image->planes[0].surface.meta_slice_size * layer_count; + uint64_t offset = image->offset + image->planes[0].surface.meta_offset + + image->planes[0].surface.meta_slice_size * range->baseArrayLayer; if (htile_mask == UINT_MAX) { /* Clear the whole HTILE buffer. */ diff --git a/src/amd/vulkan/radv_meta_dcc_retile.c b/src/amd/vulkan/radv_meta_dcc_retile.c index d2c2466c461..ece65e94081 100644 --- a/src/amd/vulkan/radv_meta_dcc_retile.c +++ b/src/amd/vulkan/radv_meta_dcc_retile.c @@ -234,8 +234,8 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image) &(VkBufferViewCreateInfo){ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, .buffer = radv_buffer_to_handle(&buffer), - .offset = image->planes[0].surface.dcc_offset, - .range = image->planes[0].surface.dcc_size, + .offset = image->planes[0].surface.meta_offset, + .range = image->planes[0].surface.meta_size, .format = VK_FORMAT_R8_UINT, }); radv_buffer_view_init(views + 2, cmd_buffer->device, diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 15ae7ebb36c..8682dad2a4c 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1885,7 +1885,8 @@ radv_image_has_fmask(const struct radv_image *image) static inline bool radv_image_has_dcc(const struct radv_image *image) { - return image->planes[0].surface.dcc_offset; + return !(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) && + image->planes[0].surface.meta_offset; } /** @@ -1903,7 +1904,7 @@ radv_image_is_tc_compat_cmask(const struct radv_image *image) static inline bool radv_dcc_enabled(const struct radv_image *image, unsigned level) { - return radv_image_has_dcc(image) && level < image->planes[0].surface.num_dcc_levels; + return radv_image_has_dcc(image) && level < image->planes[0].surface.num_meta_levels; } /** @@ -1921,7 +1922,8 @@ radv_image_has_CB_metadata(const struct radv_image *image) static inline bool radv_image_has_htile(const struct radv_image *image) { - return image->planes[0].surface.htile_size; + return image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER && + image->planes[0].surface.meta_size; } /** @@ -1930,7 +1932,7 @@ radv_image_has_htile(const struct radv_image *image) static inline bool radv_htile_enabled(const struct radv_image *image, unsigned level) { - return radv_image_has_htile(image) && level < image->planes[0].surface.num_htile_levels; + return radv_image_has_htile(image) && level < image->planes[0].surface.num_meta_levels; } /** diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 18fac37cb8f..d24520bb597 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -757,7 +757,7 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; unsigned num_pipes = rscreen->info.num_tile_pipes; - rtex->surface.htile_size = 0; + rtex->surface.meta_size = 0; if (rscreen->chip_class <= EVERGREEN && rscreen->info.drm_minor < 26) @@ -804,8 +804,8 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; base_align = num_pipes * pipe_interleave_bytes; - rtex->surface.htile_alignment_log2 = util_logbase2(base_align); - rtex->surface.htile_size = + rtex->surface.meta_alignment_log2 = util_logbase2(base_align); + rtex->surface.meta_size = util_num_layers(&rtex->resource.b.b, 0) * align(slice_bytes, base_align); } @@ -815,11 +815,11 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, { r600_texture_get_htile_size(rscreen, rtex); - if (!rtex->surface.htile_size) + if (!rtex->surface.meta_size) return; - rtex->htile_offset = align(rtex->size, 1 << rtex->surface.htile_alignment_log2); - rtex->size = rtex->htile_offset + rtex->surface.htile_size; + rtex->htile_offset = align(rtex->size, 1 << rtex->surface.meta_alignment_log2); + rtex->size = rtex->htile_offset + rtex->surface.meta_size; } void r600_print_texture_info(struct r600_common_screen *rscreen, @@ -861,8 +861,8 @@ void r600_print_texture_info(struct r600_common_screen *rscreen, if (rtex->htile_offset) u_log_printf(log, " HTile: offset=%"PRIu64", size=%u " "alignment=%u\n", - rtex->htile_offset, rtex->surface.htile_size, - 1 << rtex->surface.htile_alignment_log2); + rtex->htile_offset, rtex->surface.meta_size, + 1 << rtex->surface.meta_alignment_log2); for (i = 0; i <= rtex->resource.b.b.last_level; i++) u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", " @@ -1000,7 +1000,7 @@ r600_texture_create_object(struct pipe_screen *screen, r600_screen_clear_buffer(rscreen, &rtex->resource.b.b, rtex->htile_offset, - rtex->surface.htile_size, + rtex->surface.meta_size, clear_value); } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 8c1dca6c807..9cf477f1b00 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1319,10 +1319,12 @@ void si_flush_implicit_resources(struct si_context *sctx) void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) { + assert(!tex->is_depth); + /* If graphics is disabled, we can't decompress DCC, but it shouldn't * be compressed either. The caller should simply discard it. */ - if (!tex->surface.dcc_offset || !sctx->has_graphics) + if (!tex->surface.meta_offset || !sctx->has_graphics) return; if (sctx->chip_class == GFX8) { @@ -1332,7 +1334,7 @@ void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) struct pipe_resource *ptex = &tex->buffer.b.b; /* DCC decompression using a compute shader. */ - for (unsigned level = 0; level < tex->surface.num_dcc_levels; level++) { + for (unsigned level = 0; level < tex->surface.num_meta_levels; level++) { struct pipe_box box; u_box_3d(0, 0, 0, u_minify(ptex->width0, level), @@ -1341,7 +1343,7 @@ void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) si_compute_copy_image(sctx, ptex, level, ptex, level, 0, 0, 0, &box, true, /* Sync before the first copy and after the last copy */ (level == 0 ? SI_OP_SYNC_BEFORE : 0) | - (level == tex->surface.num_dcc_levels - 1 ? SI_OP_SYNC_AFTER : 0)); + (level == tex->surface.num_meta_levels - 1 ? SI_OP_SYNC_AFTER : 0)); } /* Now clear DCC metadata to uncompressed. @@ -1352,8 +1354,8 @@ void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex) * dEQP-GLES31.functional.image_load_store.2d.format_reinterpret.rgba32f_rgba32i */ uint32_t clear_value = DCC_UNCOMPRESSED; - si_clear_buffer(sctx, ptex, tex->surface.dcc_offset, - tex->surface.dcc_size, &clear_value, 4, SI_OP_SYNC_AFTER, + si_clear_buffer(sctx, ptex, tex->surface.meta_offset, + tex->surface.meta_size, &clear_value, 4, SI_OP_SYNC_AFTER, SI_COHERENCY_CB_META, SI_COMPUTE_CLEAR_METHOD); } } diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index e8b3523b6a9..5cbfa50714c 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -276,7 +276,7 @@ bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsi dcc_offset = 0; } else { dcc_buffer = &tex->buffer.b.b; - dcc_offset = tex->surface.dcc_offset; + dcc_offset = tex->surface.meta_offset; } if (sctx->chip_class >= GFX9) { @@ -289,7 +289,7 @@ bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsi if (tex->buffer.b.b.nr_storage_samples >= 4) return false; - clear_size = tex->surface.dcc_size; + clear_size = tex->surface.meta_size; } else { unsigned num_layers = util_num_layers(&tex->buffer.b.b, level); @@ -683,8 +683,8 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers, uint32_t clear_value = (zstex->surface.has_stencil && !zstex->htile_stencil_disabled) || sctx->chip_class == GFX8 ? 0xfffff30f : 0xfffc000f; - si_clear_buffer(sctx, &zstex->buffer.b.b, zstex->surface.htile_offset, - zstex->surface.htile_size, &clear_value, 4, + si_clear_buffer(sctx, &zstex->buffer.b.b, zstex->surface.meta_offset, + zstex->surface.meta_size, &clear_value, 4, SI_OP_SYNC_BEFORE_AFTER, SI_COHERENCY_DB_META, SI_AUTO_SELECT_CLEAR_METHOD); } diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 57f64afb934..008a3756908 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -602,7 +602,7 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex) struct pipe_image_view img[3]; assert(tex->dcc_retile_buffer); - assert(tex->surface.dcc_offset && tex->surface.dcc_offset <= UINT_MAX); + assert(tex->surface.meta_offset && tex->surface.meta_offset <= UINT_MAX); assert(tex->surface.display_dcc_offset && tex->surface.display_dcc_offset <= UINT_MAX); for (unsigned i = 0; i < 3; i++) { @@ -616,8 +616,8 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex) img[0].u.buf.size = ac_surface_get_retile_map_size(&tex->surface); img[1].format = PIPE_FORMAT_R8_UINT; - img[1].u.buf.offset = tex->surface.dcc_offset; - img[1].u.buf.size = tex->surface.dcc_size; + img[1].u.buf.offset = tex->surface.meta_offset; + img[1].u.buf.size = tex->surface.meta_size; img[2].format = PIPE_FORMAT_R8_UINT; img[2].u.buf.offset = tex->surface.display_dcc_offset; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 5602f23f01b..416e11c6c31 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -332,7 +332,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture if (!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level)) { meta_va = - (!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->surface.dcc_offset; + (!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->surface.meta_offset; if (sscreen->info.chip_class == GFX8) { meta_va += tex->surface.u.legacy.dcc_level[base_level].dcc_offset; @@ -340,11 +340,11 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture } unsigned dcc_tile_swizzle = tex->surface.tile_swizzle << 8; - dcc_tile_swizzle &= (1 << tex->surface.dcc_alignment_log2) - 1; + dcc_tile_swizzle &= (1 << tex->surface.meta_alignment_log2) - 1; meta_va |= dcc_tile_swizzle; } else if (vi_tc_compat_htile_enabled(tex, first_level, is_stencil ? PIPE_MASK_S : PIPE_MASK_Z)) { - meta_va = tex->buffer.gpu_address + tex->surface.htile_offset; + meta_va = tex->buffer.gpu_address + tex->surface.meta_offset; } if (meta_va) @@ -372,7 +372,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture .pipe_aligned = 1, }; - if (tex->surface.dcc_offset) + if (!tex->is_depth && tex->surface.meta_offset) meta = tex->surface.u.gfx9.dcc; state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) | @@ -411,7 +411,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture .pipe_aligned = 1, }; - if (tex->surface.dcc_offset) + if (!tex->is_depth && tex->surface.meta_offset) meta = tex->surface.u.gfx9.dcc; state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) | @@ -491,8 +491,11 @@ static void si_set_sampler_view_desc(struct si_context *sctx, struct si_sampler_ static bool color_needs_decompression(struct si_texture *tex) { + if (tex->is_depth) + return false; + return tex->surface.fmask_size || - (tex->dirty_level_mask && (tex->cmask_buffer || tex->surface.dcc_offset)); + (tex->dirty_level_mask && (tex->cmask_buffer || tex->surface.meta_offset)); } static bool depth_needs_decompression(struct si_texture *tex) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 78c956b338b..cd35cf5010c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1657,7 +1657,7 @@ si_shader_selector_reference(struct si_context *sctx, /* sctx can optionally be static inline bool vi_dcc_enabled(struct si_texture *tex, unsigned level) { - return tex->surface.dcc_offset && level < tex->surface.num_dcc_levels; + return !tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels; } static inline unsigned si_tile_mode_index(struct si_texture *tex, unsigned level, bool stencil) @@ -1844,13 +1844,13 @@ static inline bool si_htile_enabled(struct si_texture *tex, unsigned level, unsi if (zs_mask == PIPE_MASK_S && tex->htile_stencil_disabled) return false; - return tex->surface.htile_offset && level < tex->surface.num_htile_levels; + return tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels; } static inline bool vi_tc_compat_htile_enabled(struct si_texture *tex, unsigned level, unsigned zs_mask) { - assert(!tex->tc_compatible_htile || tex->surface.htile_offset); + assert(!tex->tc_compatible_htile || tex->surface.meta_offset); return tex->tc_compatible_htile && si_htile_enabled(tex, level, zs_mask); } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index bf7d4aa937e..5f43a1409eb 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2492,7 +2492,7 @@ static void si_init_depth_surface(struct si_context *sctx, struct si_surface *su s_info |= S_02803C_TILE_STENCIL_DISABLE(1); } - surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.htile_offset) >> 8; + surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; surf->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); if (sctx->chip_class == GFX9) { @@ -2563,7 +2563,7 @@ static void si_init_depth_surface(struct si_context *sctx, struct si_surface *su s_info |= S_028044_ALLOW_EXPCLEAR(1); } - surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.htile_offset) >> 8; + surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; surf->db_htile_surface = S_028ABC_FULL_CACHE(1); } } @@ -3031,11 +3031,11 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_color_info |= S_028C70_DCC_ENABLE(1); cb_dcc_base = - ((!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->surface.dcc_offset) >> + ((!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->surface.meta_offset) >> 8; unsigned dcc_tile_swizzle = tex->surface.tile_swizzle; - dcc_tile_swizzle &= ((1 << tex->surface.dcc_alignment_log2) - 1) >> 8; + dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8; cb_dcc_base |= dcc_tile_swizzle; } @@ -3086,7 +3086,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) .pipe_aligned = 1, }; - if (tex->surface.dcc_offset) + if (!tex->is_depth && tex->surface.meta_offset) meta = tex->surface.u.gfx9.dcc; /* Set mutable surface parameters. */ diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index d2bbd99f38e..511253e86ed 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -321,7 +321,8 @@ void si_texture_discard_cmask(struct si_screen *sscreen, struct si_texture *tex) static bool si_can_disable_dcc(struct si_texture *tex) { /* We can't disable DCC if it can be written by another process. */ - return tex->surface.dcc_offset && + return !tex->is_depth && + tex->surface.meta_offset && (!tex->buffer.b.is_shared || !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)) && !ac_modifier_has_dcc(tex->surface.modifier); @@ -458,12 +459,11 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex else si_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer); - tex->surface.dcc_offset = new_tex->surface.dcc_offset; + tex->surface.meta_offset = new_tex->surface.meta_offset; tex->cb_color_info = new_tex->cb_color_info; memcpy(tex->color_clear_value, new_tex->color_clear_value, sizeof(tex->color_clear_value)); tex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode; - tex->surface.htile_offset = new_tex->surface.htile_offset; tex->depth_clear_value = new_tex->depth_clear_value; tex->dirty_level_mask = new_tex->dirty_level_mask; tex->stencil_dirty_level_mask = new_tex->stencil_dirty_level_mask; @@ -485,10 +485,9 @@ static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_tex si_resource_reference(&tex->dcc_retile_buffer, new_tex->dcc_retile_buffer); if (new_bind_flag == PIPE_BIND_LINEAR) { - assert(!tex->surface.htile_offset); + assert(!tex->surface.meta_offset); assert(!tex->cmask_buffer); assert(!tex->surface.fmask_size); - assert(!tex->surface.dcc_offset); assert(!tex->is_depth); } @@ -536,7 +535,7 @@ static bool si_displayable_dcc_needs_explicit_flush(struct si_texture *tex) if (ac_surface_get_nplanes(&tex->surface) > 1) return false; - return tex->surface.is_displayable && tex->surface.dcc_offset; + return tex->surface.is_displayable && tex->surface.meta_offset; } static bool si_resource_get_param(struct pipe_screen *screen, struct pipe_context *context, @@ -683,7 +682,7 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex * disable it for external clients that want write * access. */ - if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->surface.dcc_offset) || + if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && !tex->is_depth && tex->surface.meta_offset) || /* Displayable DCC requires an explicit flush. */ (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && si_displayable_dcc_needs_explicit_flush(tex))) { @@ -695,7 +694,7 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex } if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && - (tex->cmask_buffer || tex->surface.dcc_offset)) { + (tex->cmask_buffer || (!tex->is_depth && tex->surface.meta_offset))) { /* Eliminate fast clear (both CMASK and DCC) */ bool flushed; si_eliminate_fast_color_clear(sctx, tex, &flushed); @@ -814,7 +813,7 @@ void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex, tex->buffer.b.b.depth0, tex->buffer.b.b.array_size, tex->buffer.b.b.last_level, tex->buffer.b.b.nr_samples); - if (tex->surface.htile_offset) + if (tex->is_depth && tex->surface.meta_offset) u_log_printf(log, ", tc_compatible_htile=%u", tex->tc_compatible_htile); u_log_printf(log, ", %s\n", @@ -832,12 +831,12 @@ void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex, return; } - if (tex->surface.dcc_offset) { + if (!tex->is_depth && tex->surface.meta_offset) { for (i = 0; i <= tex->buffer.b.b.last_level; i++) u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%u, " "fast_clear_size=%u\n", - i, i < tex->surface.num_dcc_levels, tex->surface.u.legacy.dcc_level[i].dcc_offset, + i, i < tex->surface.num_meta_levels, tex->surface.u.legacy.dcc_level[i].dcc_offset, tex->surface.u.legacy.dcc_level[i].dcc_fast_clear_size); } @@ -1030,47 +1029,47 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, tex->surface.cmask_offset, tex->surface.cmask_size, 0xCCCCCCCC); } - if (tex->surface.htile_offset) { + if (tex->is_depth && tex->surface.meta_offset) { uint32_t clear_value = 0; if (sscreen->info.chip_class >= GFX9 || tex->tc_compatible_htile) clear_value = 0x0000030F; assert(num_clears < ARRAY_SIZE(clears)); - si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.htile_offset, - tex->surface.htile_size, clear_value); + si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, + tex->surface.meta_size, clear_value); } /* Initialize DCC only if the texture is not being imported. */ - if (!(surface->flags & RADEON_SURF_IMPORTED) && tex->surface.dcc_offset) { + if (!(surface->flags & RADEON_SURF_IMPORTED) && !tex->is_depth && tex->surface.meta_offset) { /* Clear DCC to black for all tiles with DCC enabled. * * This fixes corruption in 3DMark Slingshot Extreme, which * uses uninitialized textures, causing corruption. */ - if (tex->surface.num_dcc_levels == tex->buffer.b.b.last_level + 1 && + if (tex->surface.num_meta_levels == tex->buffer.b.b.last_level + 1 && tex->buffer.b.b.nr_samples <= 2) { /* Simple case - all tiles have DCC enabled. */ assert(num_clears < ARRAY_SIZE(clears)); - si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.dcc_offset, - tex->surface.dcc_size, DCC_CLEAR_COLOR_0000); + si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, + tex->surface.meta_size, DCC_CLEAR_COLOR_0000); } else if (sscreen->info.chip_class >= GFX9) { /* Clear to uncompressed. Clearing this to black is complicated. */ assert(num_clears < ARRAY_SIZE(clears)); - si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.dcc_offset, - tex->surface.dcc_size, DCC_UNCOMPRESSED); + si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, + tex->surface.meta_size, DCC_UNCOMPRESSED); } else { /* GFX8: Initialize mipmap levels and multisamples separately. */ if (tex->buffer.b.b.nr_samples >= 2) { /* Clearing this to black is complicated. */ assert(num_clears < ARRAY_SIZE(clears)); - si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.dcc_offset, - tex->surface.dcc_size, DCC_UNCOMPRESSED); + si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, + tex->surface.meta_size, DCC_UNCOMPRESSED); } else { /* Clear the enabled mipmap levels to black. */ unsigned size = 0; - for (unsigned i = 0; i < tex->surface.num_dcc_levels; i++) { + for (unsigned i = 0; i < tex->surface.num_meta_levels; i++) { if (!tex->surface.u.legacy.dcc_level[i].dcc_fast_clear_size) break; @@ -1081,14 +1080,14 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, /* Mipmap levels with DCC. */ if (size) { assert(num_clears < ARRAY_SIZE(clears)); - si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.dcc_offset, size, + si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, size, DCC_CLEAR_COLOR_0000); } /* Mipmap levels without DCC. */ - if (size != tex->surface.dcc_size) { + if (size != tex->surface.meta_size) { assert(num_clears < ARRAY_SIZE(clears)); - si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.dcc_offset + size, - tex->surface.dcc_size - size, DCC_UNCOMPRESSED); + si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset + size, + tex->surface.meta_size - size, DCC_UNCOMPRESSED); } } } @@ -2334,13 +2333,14 @@ void vi_separate_dcc_try_enable(struct si_context *sctx, struct si_texture *tex) if (!tex->buffer.b.is_shared || !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) || tex->buffer.b.b.target != PIPE_TEXTURE_2D || tex->buffer.b.b.last_level > 0 || - !tex->surface.dcc_size || sctx->screen->debug_flags & DBG(NO_DCC) || + !tex->surface.meta_size || sctx->screen->debug_flags & DBG(NO_DCC) || sctx->screen->debug_flags & DBG(NO_DCC_FB)) return; assert(sctx->chip_class >= GFX8); + assert(!tex->is_depth); - if (tex->surface.dcc_offset) + if (tex->surface.meta_offset) return; /* already enabled */ /* Enable the DCC stat gathering. */ @@ -2352,7 +2352,7 @@ void vi_separate_dcc_try_enable(struct si_context *sctx, struct si_texture *tex) if (!vi_should_enable_separate_dcc(tex)) return; /* stats show that DCC decompression is too expensive */ - assert(tex->surface.num_dcc_levels); + assert(tex->surface.num_meta_levels); assert(!tex->dcc_separate_buffer); si_texture_discard_cmask(sctx->screen, tex); @@ -2366,13 +2366,13 @@ void vi_separate_dcc_try_enable(struct si_context *sctx, struct si_texture *tex) } else { tex->dcc_separate_buffer = si_aligned_buffer_create(sctx->b.screen, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, - tex->surface.dcc_size, 1 << tex->surface.dcc_alignment_log2); + tex->surface.meta_size, 1 << tex->surface.meta_alignment_log2); if (!tex->dcc_separate_buffer) return; } /* dcc_offset is the absolute GPUVM address. */ - tex->surface.dcc_offset = tex->dcc_separate_buffer->gpu_address; + tex->surface.meta_offset = tex->dcc_separate_buffer->gpu_address; /* no need to flag anything since this is called by fast clear that * flags framebuffer state @@ -2427,7 +2427,7 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, struct si assert(!tex->last_dcc_separate_buffer); tex->last_dcc_separate_buffer = tex->dcc_separate_buffer; tex->dcc_separate_buffer = NULL; - tex->surface.dcc_offset = 0; + tex->surface.meta_offset = 0; /* no need to flag anything since this is called after * decompression that re-sets framebuffer state */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c index 2ce9696c90e..405e414a02f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c @@ -287,7 +287,7 @@ static void si_compute_htile(const struct radeon_info *info, unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; unsigned num_pipes = info->num_tile_pipes; - surf->htile_size = 0; + surf->meta_size = 0; if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) || surf->flags & RADEON_SURF_NO_HTILE) @@ -342,8 +342,8 @@ static void si_compute_htile(const struct radeon_info *info, pipe_interleave_bytes = info->pipe_interleave_bytes; base_align = num_pipes * pipe_interleave_bytes; - surf->htile_alignment_log2 = util_logbase2(base_align); - surf->htile_size = num_layers * align(slice_bytes, base_align); + surf->meta_alignment_log2 = util_logbase2(base_align); + surf->meta_size = num_layers * align(slice_bytes, base_align); } static int radeon_winsys_surface_init(struct radeon_winsys *rws, @@ -438,9 +438,9 @@ static int radeon_winsys_surface_init(struct radeon_winsys *rws, /* Determine the memory layout of multiple allocations in one buffer. */ surf_ws->total_size = surf_ws->surf_size; - if (surf_ws->htile_size) { - surf_ws->htile_offset = align64(surf_ws->total_size, 1 << surf_ws->htile_alignment_log2); - surf_ws->total_size = surf_ws->htile_offset + surf_ws->htile_size; + if (surf_ws->meta_size) { + surf_ws->meta_offset = align64(surf_ws->total_size, 1 << surf_ws->meta_alignment_log2); + surf_ws->total_size = surf_ws->meta_offset + surf_ws->meta_size; } if (surf_ws->fmask_size) {