ac: unify HTILE codes and encoding

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39631>
This commit is contained in:
Marek Olšák 2026-01-30 16:33:24 -05:00 committed by Marge Bot
parent e0c7c642f4
commit 6f36a2be2e
5 changed files with 143 additions and 133 deletions

View file

@ -67,6 +67,100 @@ enum {
CMASK_8xMSAA_FMASK_UNCOMPRESSED_COLOR_EXPANDED = CMASK_MSAA_CODE(3, 3),
};
typedef union {
/* Z only */
struct {
unsigned zmask : 4;
unsigned minz : 14;
unsigned maxz : 14;
} z;
struct {
unsigned zmask : 4;
/* SR0/SR1 contain stencil pretest results. */
unsigned sr0 : 2;
unsigned sr1 : 2;
unsigned smem : 2;
unsigned unused : 2;
/* The Z Range consists of a 6-bit delta and 14-bit base.
* ZRANGE_PRECISION determines whether zbase means minZ or maxZ.
*/
unsigned zdelta : 6;
unsigned zbase : 14;
} zs;
/* Z + VRS. VRS fields are 0-based: (0, 0) means VRS 1x1. */
struct {
unsigned zmask : 4;
unsigned sr0 : 2;
unsigned vrs_x : 2;
unsigned smem : 2;
unsigned vrs_y : 2;
unsigned zdelta : 6;
unsigned zbase : 14;
} zs_vrs;
uint32_t dword;
} ac_htile_dword;
#define HTILE_Z_CODE(...) ((ac_htile_dword){.z = {__VA_ARGS__}}).dword
#define HTILE_ZS_CODE(...) ((ac_htile_dword){.zs = {__VA_ARGS__}}).dword
#define HTILE_ZS_VRS_CODE(...) ((ac_htile_dword){.zs_vrs = {__VA_ARGS__}}).dword
/* depth must be in [0, 1]. This only clears HiZ and sets the Z/S state to "cleared".
* The DB register contain the full clear values.
*/
#define HTILE_Z_CLEAR_REG(depth) HTILE_Z_CODE( \
.zmask = 0, \
.minz = lroundf((depth) * 0x3FFF), \
.maxz = lroundf((depth) * 0x3FFF))
#define HTILE_ZS_CLEAR_REG(depth) HTILE_ZS_CODE( \
.zmask = 0, \
.sr0 = 0x3, \
.sr1 = 0x3, \
.smem = 0, \
.zdelta = 0, \
.zbase = lroundf((depth) * 0x3FFF))
#define HTILE_ZS_VRS_CLEAR_REG(depth) HTILE_ZS_VRS_CODE( \
.zmask = 0, \
.sr0 = 0x3, \
.smem = 0, \
.zdelta = 0, \
.zbase = lroundf((depth) * 0x3FFF), \
.vrs_x = 0, /* VRS = 1x1 (0-based) */ \
.vrs_y = 0)
/* Zmask = Z uncompressed, minZ = 0, maxZ = 1. */
#define HTILE_Z_UNCOMPRESSED HTILE_Z_CODE( \
.zmask = 0xF, \
.minz = 0, \
.maxz = 0x3FFF)
/* Zmask = Z uncompressed, SR0/SR1 = Stencil pretest is unknown, Smem = Stencil uncompressed,
* ZRange = [0, 1].
*/
#define HTILE_ZS_UNCOMPRESSED HTILE_ZS_CODE( \
.zmask = 0xF, \
.sr0 = 0x3, \
.sr1 = 0x3, \
.smem = 0x3, \
.zdelta = 0x3F, \
.zbase = 0x3FFF)
/* Zmask = Z uncompressed, SR0 = Stencil pretest is unknown, Smem = Stencil uncompressed,
* ZRange = [0, 1], VRS = 1x1 (0-based).
*/
#define HTILE_ZS_VRS_UNCOMPRESSED HTILE_ZS_VRS_CODE( \
.zmask = 0xF, \
.sr0 = 0x3, \
.smem = 0x3, \
.zdelta = 0x3F, \
.zbase = 0x3FFF, \
.vrs_x = 0, /* VRS = 1x1 (0-based) */ \
.vrs_y = 0)
unsigned
ac_map_swizzle(unsigned swizzle);

View file

@ -679,47 +679,14 @@ static uint32_t
radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
VkClearDepthStencilValue value)
{
uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */
uint32_t zmask = 0, smem = 0;
uint32_t htile_value;
uint32_t zmin, zmax;
/* Convert the depth value to 14-bit zmin/zmax values. */
zmin = lroundf(value.depth * max_zval);
zmax = zmin;
if (radv_image_tile_stencil_disabled(device, image)) {
/* Z only (no stencil):
*
* |31 18|17 4|3 0|
* +---------+---------+-------+
* | Max Z | Min Z | ZMask |
*/
htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0));
return HTILE_Z_CLEAR_REG(value.depth);
} else {
/* Z and stencil:
*
* |31 12|11 10|9 8|7 6|5 4|3 0|
* +-----------+-----+------+-----+-----+-------+
* | Z Range | | SMem | SR1 | SR0 | ZMask |
*
* Z, stencil, 4 bit VRS encoding:
* |31 12| 11 10 |9 8|7 6 |5 4|3 0|
* +-----------+------------+------+------------+-----+-------+
* | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |
*/
uint32_t delta = 0;
uint32_t zrange = ((zmax << 6) | delta);
uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */
if (radv_image_has_vrs_htile(device, image))
sresults = 0x3;
htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0));
return HTILE_ZS_VRS_CLEAR_REG(value.depth);
else
return HTILE_ZS_CLEAR_REG(value.depth);
}
return htile_value;
}
static uint32_t
@ -727,32 +694,52 @@ radv_get_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image
bool is_clear)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint32_t mask = 0;
if (radv_image_tile_stencil_disabled(device, image)) {
/* All the HTILE buffer is used when there is no stencil. */
mask = UINT32_MAX;
return UINT32_MAX;
} else {
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
mask |= 0xfffffc0f;
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
mask |= 0x000003f0;
/* Preserve VRS rates during clears but not during initialization. */
if (is_clear && radv_image_has_vrs_htile(device, image)) {
mask &= ~(0x3 << 6); /* VRS X-rate */
mask &= ~(0x3 << 10); /* VRS Y-rate */
}
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
/* Clear both aspects on SDMA, it's not ideal but there is no other way to initialize the
* HTILE buffer.
*/
mask = UINT32_MAX;
return UINT32_MAX;
}
}
return mask;
uint32_t mask = 0;
if (radv_image_has_vrs_htile(device, image)) {
/* Preserve VRS rates during clears but not during initialization. */
uint32_t vrs_mask = is_clear ? 0 : 0x3;
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
mask |= HTILE_ZS_VRS_CODE(.zmask = 0xF,
.zdelta = 0x3F,
.zbase = 0x3FFF,
.vrs_x = vrs_mask,
.vrs_y = vrs_mask);
}
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
mask |= HTILE_ZS_VRS_CODE(.sr0 = 0x3,
.smem = 0x3,
.vrs_x = vrs_mask,
.vrs_y = vrs_mask);
}
} else {
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
mask |= HTILE_ZS_CODE(.zmask = 0xF,
.zdelta = 0x3F,
.zbase = 0x3FFF);
}
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
mask |= HTILE_ZS_CODE(.sr0 = 0x3,
.sr1 = 0x3,
.smem = 0x3);
}
}
return mask;
}
}
static bool

View file

@ -296,40 +296,15 @@ radv_get_hiz_valid_va(const struct radv_image *image, uint32_t base_level)
static inline uint32_t
radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
{
uint32_t initial_value;
if (radv_image_tile_stencil_disabled(device, image)) {
/* Z only (no stencil):
*
* |31 18|17 4|3 0|
* +---------+---------+-------+
* | Max Z | Min Z | ZMask |
*/
initial_value = 0xfffc000f;
return HTILE_Z_UNCOMPRESSED;
} else {
/* Z and stencil:
*
* |31 12|11 10|9 8|7 6|5 4|3 0|
* +-----------+-----+------+-----+-----+-------+
* | Z Range | | SMem | SR1 | SR0 | ZMask |
*
* SR0/SR1 contains the stencil test results. Initializing
* SR0/SR1 to 0x3 means the stencil test result is unknown.
*
* Z, stencil and 4 bit VRS encoding:
* |31 12|11 10|9 8|7 6|5 4|3 0|
* +-----------+------------+------+------------+-----+-------+
* | Z Range | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
*/
if (radv_image_has_vrs_htile(device, image)) {
/* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
initial_value = 0xfffff33f;
return HTILE_ZS_VRS_UNCOMPRESSED;
} else {
initial_value = 0xfffff3ff;
return HTILE_ZS_UNCOMPRESSED;
}
}
return initial_value;
}
static inline uint32_t

View file

@ -573,52 +573,10 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_
static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth)
{
/* Maximum 14-bit UINT value. */
const uint32_t max_z_value = 0x3FFF;
/* For clears, Zmask and Smem will always be set to zero. */
const uint32_t zmask = 0;
const uint32_t smem = 0;
/* Convert depthValue to 14-bit zmin/zmax uint values. */
const uint32_t zmin = lroundf(depth * max_z_value);
const uint32_t zmax = zmin;
if (tex->htile_stencil_disabled) {
/* Z-only HTILE is laid out as follows:
* |31 18|17 4|3 0|
* +---------+---------+-------+
* | Max Z | Min Z | ZMask |
*/
return ((zmax & 0x3FFF) << 18) |
((zmin & 0x3FFF) << 4) |
((zmask & 0xF) << 0);
} else {
/* Z+S HTILE is laid out as-follows:
* |31 12|11 10|9 8|7 6|5 4|3 0|
* +-----------+-----+------+-----+-----+-------+
* | Z Range | | SMem | SR1 | SR0 | ZMask |
*
* The base value for zRange is either zMax or zMin, depending on ZRANGE_PRECISION.
* For a fast clear, zMin == zMax == clearValue. This means that the base will
* always be the clear value (converted to 14-bit UINT).
*
* When abs(zMax-zMin) < 16, the delta is equal to the difference. In the case of
* fast clears, where zMax == zMin, the delta is always zero.
*/
const uint32_t delta = 0;
const uint32_t zrange = (zmax << 6) | delta;
/* SResults 0 & 1 are set based on the stencil compare state.
* For fast-clear, the default value of sr0 and sr1 are both 0x3.
*/
const uint32_t sresults = 0xf;
return ((zrange & 0xFFFFF) << 12) |
((smem & 0x3) << 8) |
((sresults & 0xF) << 4) |
((zmask & 0xF) << 0);
}
if (tex->htile_stencil_disabled)
return HTILE_Z_CLEAR_REG(depth);
else
return HTILE_ZS_CLEAR_REG(depth);
}
static bool si_can_fast_clear_depth(struct si_texture *zstex, unsigned level, float depth,

View file

@ -1264,14 +1264,10 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
CMASK_MSAA_FMASK_CLEAR_0_COLOR_EXPANDED);
}
if (tex->is_depth && tex->surface.meta_offset) {
uint32_t clear_value = 0;
if (sscreen->info.gfx_level >= GFX9 || tex->tc_compatible_htile)
clear_value = 0x0000030F;
assert(num_clears < ARRAY_SIZE(clears));
si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
tex->surface.meta_size, clear_value);
tex->surface.meta_size,
tex->htile_stencil_disabled ? HTILE_Z_UNCOMPRESSED : HTILE_ZS_UNCOMPRESSED);
}
/* Initialize DCC only if the texture is not being imported. */