mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-25 00:20:34 +01:00
ac: unify HTILE codes and encoding
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39631>
This commit is contained in:
parent
e0c7c642f4
commit
6f36a2be2e
5 changed files with 143 additions and 133 deletions
|
|
@ -67,6 +67,100 @@ enum {
|
|||
CMASK_8xMSAA_FMASK_UNCOMPRESSED_COLOR_EXPANDED = CMASK_MSAA_CODE(3, 3),
|
||||
};
|
||||
|
||||
typedef union {
|
||||
/* Z only */
|
||||
struct {
|
||||
unsigned zmask : 4;
|
||||
unsigned minz : 14;
|
||||
unsigned maxz : 14;
|
||||
} z;
|
||||
|
||||
struct {
|
||||
unsigned zmask : 4;
|
||||
/* SR0/SR1 contain stencil pretest results. */
|
||||
unsigned sr0 : 2;
|
||||
unsigned sr1 : 2;
|
||||
unsigned smem : 2;
|
||||
unsigned unused : 2;
|
||||
/* The Z Range consists of a 6-bit delta and 14-bit base.
|
||||
* ZRANGE_PRECISION determines whether zbase means minZ or maxZ.
|
||||
*/
|
||||
unsigned zdelta : 6;
|
||||
unsigned zbase : 14;
|
||||
} zs;
|
||||
|
||||
/* Z + VRS. VRS fields are 0-based: (0, 0) means VRS 1x1. */
|
||||
struct {
|
||||
unsigned zmask : 4;
|
||||
unsigned sr0 : 2;
|
||||
unsigned vrs_x : 2;
|
||||
unsigned smem : 2;
|
||||
unsigned vrs_y : 2;
|
||||
unsigned zdelta : 6;
|
||||
unsigned zbase : 14;
|
||||
} zs_vrs;
|
||||
|
||||
uint32_t dword;
|
||||
} ac_htile_dword;
|
||||
|
||||
#define HTILE_Z_CODE(...) ((ac_htile_dword){.z = {__VA_ARGS__}}).dword
|
||||
#define HTILE_ZS_CODE(...) ((ac_htile_dword){.zs = {__VA_ARGS__}}).dword
|
||||
#define HTILE_ZS_VRS_CODE(...) ((ac_htile_dword){.zs_vrs = {__VA_ARGS__}}).dword
|
||||
|
||||
/* depth must be in [0, 1]. This only clears HiZ and sets the Z/S state to "cleared".
|
||||
* The DB register contain the full clear values.
|
||||
*/
|
||||
#define HTILE_Z_CLEAR_REG(depth) HTILE_Z_CODE( \
|
||||
.zmask = 0, \
|
||||
.minz = lroundf((depth) * 0x3FFF), \
|
||||
.maxz = lroundf((depth) * 0x3FFF))
|
||||
|
||||
#define HTILE_ZS_CLEAR_REG(depth) HTILE_ZS_CODE( \
|
||||
.zmask = 0, \
|
||||
.sr0 = 0x3, \
|
||||
.sr1 = 0x3, \
|
||||
.smem = 0, \
|
||||
.zdelta = 0, \
|
||||
.zbase = lroundf((depth) * 0x3FFF))
|
||||
|
||||
#define HTILE_ZS_VRS_CLEAR_REG(depth) HTILE_ZS_VRS_CODE( \
|
||||
.zmask = 0, \
|
||||
.sr0 = 0x3, \
|
||||
.smem = 0, \
|
||||
.zdelta = 0, \
|
||||
.zbase = lroundf((depth) * 0x3FFF), \
|
||||
.vrs_x = 0, /* VRS = 1x1 (0-based) */ \
|
||||
.vrs_y = 0)
|
||||
|
||||
/* Zmask = Z uncompressed, minZ = 0, maxZ = 1. */
|
||||
#define HTILE_Z_UNCOMPRESSED HTILE_Z_CODE( \
|
||||
.zmask = 0xF, \
|
||||
.minz = 0, \
|
||||
.maxz = 0x3FFF)
|
||||
|
||||
/* Zmask = Z uncompressed, SR0/SR1 = Stencil pretest is unknown, Smem = Stencil uncompressed,
|
||||
* ZRange = [0, 1].
|
||||
*/
|
||||
#define HTILE_ZS_UNCOMPRESSED HTILE_ZS_CODE( \
|
||||
.zmask = 0xF, \
|
||||
.sr0 = 0x3, \
|
||||
.sr1 = 0x3, \
|
||||
.smem = 0x3, \
|
||||
.zdelta = 0x3F, \
|
||||
.zbase = 0x3FFF)
|
||||
|
||||
/* Zmask = Z uncompressed, SR0 = Stencil pretest is unknown, Smem = Stencil uncompressed,
|
||||
* ZRange = [0, 1], VRS = 1x1 (0-based).
|
||||
*/
|
||||
#define HTILE_ZS_VRS_UNCOMPRESSED HTILE_ZS_VRS_CODE( \
|
||||
.zmask = 0xF, \
|
||||
.sr0 = 0x3, \
|
||||
.smem = 0x3, \
|
||||
.zdelta = 0x3F, \
|
||||
.zbase = 0x3FFF, \
|
||||
.vrs_x = 0, /* VRS = 1x1 (0-based) */ \
|
||||
.vrs_y = 0)
|
||||
|
||||
unsigned
|
||||
ac_map_swizzle(unsigned swizzle);
|
||||
|
||||
|
|
|
|||
|
|
@ -679,47 +679,14 @@ static uint32_t
|
|||
radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
|
||||
VkClearDepthStencilValue value)
|
||||
{
|
||||
uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */
|
||||
uint32_t zmask = 0, smem = 0;
|
||||
uint32_t htile_value;
|
||||
uint32_t zmin, zmax;
|
||||
|
||||
/* Convert the depth value to 14-bit zmin/zmax values. */
|
||||
zmin = lroundf(value.depth * max_zval);
|
||||
zmax = zmin;
|
||||
|
||||
if (radv_image_tile_stencil_disabled(device, image)) {
|
||||
/* Z only (no stencil):
|
||||
*
|
||||
* |31 18|17 4|3 0|
|
||||
* +---------+---------+-------+
|
||||
* | Max Z | Min Z | ZMask |
|
||||
*/
|
||||
htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0));
|
||||
return HTILE_Z_CLEAR_REG(value.depth);
|
||||
} else {
|
||||
|
||||
/* Z and stencil:
|
||||
*
|
||||
* |31 12|11 10|9 8|7 6|5 4|3 0|
|
||||
* +-----------+-----+------+-----+-----+-------+
|
||||
* | Z Range | | SMem | SR1 | SR0 | ZMask |
|
||||
*
|
||||
* Z, stencil, 4 bit VRS encoding:
|
||||
* |31 12| 11 10 |9 8|7 6 |5 4|3 0|
|
||||
* +-----------+------------+------+------------+-----+-------+
|
||||
* | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |
|
||||
*/
|
||||
uint32_t delta = 0;
|
||||
uint32_t zrange = ((zmax << 6) | delta);
|
||||
uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */
|
||||
|
||||
if (radv_image_has_vrs_htile(device, image))
|
||||
sresults = 0x3;
|
||||
|
||||
htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0));
|
||||
return HTILE_ZS_VRS_CLEAR_REG(value.depth);
|
||||
else
|
||||
return HTILE_ZS_CLEAR_REG(value.depth);
|
||||
}
|
||||
|
||||
return htile_value;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
@ -727,32 +694,52 @@ radv_get_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image
|
|||
bool is_clear)
|
||||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
uint32_t mask = 0;
|
||||
|
||||
if (radv_image_tile_stencil_disabled(device, image)) {
|
||||
/* All the HTILE buffer is used when there is no stencil. */
|
||||
mask = UINT32_MAX;
|
||||
return UINT32_MAX;
|
||||
} else {
|
||||
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
mask |= 0xfffffc0f;
|
||||
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
mask |= 0x000003f0;
|
||||
|
||||
/* Preserve VRS rates during clears but not during initialization. */
|
||||
if (is_clear && radv_image_has_vrs_htile(device, image)) {
|
||||
mask &= ~(0x3 << 6); /* VRS X-rate */
|
||||
mask &= ~(0x3 << 10); /* VRS Y-rate */
|
||||
}
|
||||
|
||||
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
|
||||
/* Clear both aspects on SDMA, it's not ideal but there is no other way to initialize the
|
||||
* HTILE buffer.
|
||||
*/
|
||||
mask = UINT32_MAX;
|
||||
return UINT32_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
uint32_t mask = 0;
|
||||
|
||||
if (radv_image_has_vrs_htile(device, image)) {
|
||||
/* Preserve VRS rates during clears but not during initialization. */
|
||||
uint32_t vrs_mask = is_clear ? 0 : 0x3;
|
||||
|
||||
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
mask |= HTILE_ZS_VRS_CODE(.zmask = 0xF,
|
||||
.zdelta = 0x3F,
|
||||
.zbase = 0x3FFF,
|
||||
.vrs_x = vrs_mask,
|
||||
.vrs_y = vrs_mask);
|
||||
}
|
||||
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
mask |= HTILE_ZS_VRS_CODE(.sr0 = 0x3,
|
||||
.smem = 0x3,
|
||||
.vrs_x = vrs_mask,
|
||||
.vrs_y = vrs_mask);
|
||||
}
|
||||
} else {
|
||||
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
mask |= HTILE_ZS_CODE(.zmask = 0xF,
|
||||
.zdelta = 0x3F,
|
||||
.zbase = 0x3FFF);
|
||||
}
|
||||
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
mask |= HTILE_ZS_CODE(.sr0 = 0x3,
|
||||
.sr1 = 0x3,
|
||||
.smem = 0x3);
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -296,40 +296,15 @@ radv_get_hiz_valid_va(const struct radv_image *image, uint32_t base_level)
|
|||
static inline uint32_t
|
||||
radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
uint32_t initial_value;
|
||||
|
||||
if (radv_image_tile_stencil_disabled(device, image)) {
|
||||
/* Z only (no stencil):
|
||||
*
|
||||
* |31 18|17 4|3 0|
|
||||
* +---------+---------+-------+
|
||||
* | Max Z | Min Z | ZMask |
|
||||
*/
|
||||
initial_value = 0xfffc000f;
|
||||
return HTILE_Z_UNCOMPRESSED;
|
||||
} else {
|
||||
/* Z and stencil:
|
||||
*
|
||||
* |31 12|11 10|9 8|7 6|5 4|3 0|
|
||||
* +-----------+-----+------+-----+-----+-------+
|
||||
* | Z Range | | SMem | SR1 | SR0 | ZMask |
|
||||
*
|
||||
* SR0/SR1 contains the stencil test results. Initializing
|
||||
* SR0/SR1 to 0x3 means the stencil test result is unknown.
|
||||
*
|
||||
* Z, stencil and 4 bit VRS encoding:
|
||||
* |31 12|11 10|9 8|7 6|5 4|3 0|
|
||||
* +-----------+------------+------+------------+-----+-------+
|
||||
* | Z Range | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask |
|
||||
*/
|
||||
if (radv_image_has_vrs_htile(device, image)) {
|
||||
/* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */
|
||||
initial_value = 0xfffff33f;
|
||||
return HTILE_ZS_VRS_UNCOMPRESSED;
|
||||
} else {
|
||||
initial_value = 0xfffff3ff;
|
||||
return HTILE_ZS_UNCOMPRESSED;
|
||||
}
|
||||
}
|
||||
|
||||
return initial_value;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
|
|
|
|||
|
|
@ -573,52 +573,10 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_
|
|||
|
||||
static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth)
|
||||
{
|
||||
/* Maximum 14-bit UINT value. */
|
||||
const uint32_t max_z_value = 0x3FFF;
|
||||
|
||||
/* For clears, Zmask and Smem will always be set to zero. */
|
||||
const uint32_t zmask = 0;
|
||||
const uint32_t smem = 0;
|
||||
|
||||
/* Convert depthValue to 14-bit zmin/zmax uint values. */
|
||||
const uint32_t zmin = lroundf(depth * max_z_value);
|
||||
const uint32_t zmax = zmin;
|
||||
|
||||
if (tex->htile_stencil_disabled) {
|
||||
/* Z-only HTILE is laid out as follows:
|
||||
* |31 18|17 4|3 0|
|
||||
* +---------+---------+-------+
|
||||
* | Max Z | Min Z | ZMask |
|
||||
*/
|
||||
return ((zmax & 0x3FFF) << 18) |
|
||||
((zmin & 0x3FFF) << 4) |
|
||||
((zmask & 0xF) << 0);
|
||||
} else {
|
||||
/* Z+S HTILE is laid out as-follows:
|
||||
* |31 12|11 10|9 8|7 6|5 4|3 0|
|
||||
* +-----------+-----+------+-----+-----+-------+
|
||||
* | Z Range | | SMem | SR1 | SR0 | ZMask |
|
||||
*
|
||||
* The base value for zRange is either zMax or zMin, depending on ZRANGE_PRECISION.
|
||||
* For a fast clear, zMin == zMax == clearValue. This means that the base will
|
||||
* always be the clear value (converted to 14-bit UINT).
|
||||
*
|
||||
* When abs(zMax-zMin) < 16, the delta is equal to the difference. In the case of
|
||||
* fast clears, where zMax == zMin, the delta is always zero.
|
||||
*/
|
||||
const uint32_t delta = 0;
|
||||
const uint32_t zrange = (zmax << 6) | delta;
|
||||
|
||||
/* SResults 0 & 1 are set based on the stencil compare state.
|
||||
* For fast-clear, the default value of sr0 and sr1 are both 0x3.
|
||||
*/
|
||||
const uint32_t sresults = 0xf;
|
||||
|
||||
return ((zrange & 0xFFFFF) << 12) |
|
||||
((smem & 0x3) << 8) |
|
||||
((sresults & 0xF) << 4) |
|
||||
((zmask & 0xF) << 0);
|
||||
}
|
||||
if (tex->htile_stencil_disabled)
|
||||
return HTILE_Z_CLEAR_REG(depth);
|
||||
else
|
||||
return HTILE_ZS_CLEAR_REG(depth);
|
||||
}
|
||||
|
||||
static bool si_can_fast_clear_depth(struct si_texture *zstex, unsigned level, float depth,
|
||||
|
|
|
|||
|
|
@ -1264,14 +1264,10 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
|
|||
CMASK_MSAA_FMASK_CLEAR_0_COLOR_EXPANDED);
|
||||
}
|
||||
if (tex->is_depth && tex->surface.meta_offset) {
|
||||
uint32_t clear_value = 0;
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX9 || tex->tc_compatible_htile)
|
||||
clear_value = 0x0000030F;
|
||||
|
||||
assert(num_clears < ARRAY_SIZE(clears));
|
||||
si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
|
||||
tex->surface.meta_size, clear_value);
|
||||
tex->surface.meta_size,
|
||||
tex->htile_stencil_disabled ? HTILE_Z_UNCOMPRESSED : HTILE_ZS_UNCOMPRESSED);
|
||||
}
|
||||
|
||||
/* Initialize DCC only if the texture is not being imported. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue