diff --git a/src/amd/common/ac_descriptors.h b/src/amd/common/ac_descriptors.h index 91f0079430a..99cafa8a110 100644 --- a/src/amd/common/ac_descriptors.h +++ b/src/amd/common/ac_descriptors.h @@ -67,6 +67,100 @@ enum { CMASK_8xMSAA_FMASK_UNCOMPRESSED_COLOR_EXPANDED = CMASK_MSAA_CODE(3, 3), }; +typedef union { + /* Z only */ + struct { + unsigned zmask : 4; + unsigned minz : 14; + unsigned maxz : 14; + } z; + + struct { + unsigned zmask : 4; + /* SR0/SR1 contain stencil pretest results. */ + unsigned sr0 : 2; + unsigned sr1 : 2; + unsigned smem : 2; + unsigned unused : 2; + /* The Z Range consists of a 6-bit delta and 14-bit base. + * ZRANGE_PRECISION determines whether zbase means minZ or maxZ. + */ + unsigned zdelta : 6; + unsigned zbase : 14; + } zs; + + /* Z + VRS. VRS fields are 0-based: (0, 0) means VRS 1x1. */ + struct { + unsigned zmask : 4; + unsigned sr0 : 2; + unsigned vrs_x : 2; + unsigned smem : 2; + unsigned vrs_y : 2; + unsigned zdelta : 6; + unsigned zbase : 14; + } zs_vrs; + + uint32_t dword; +} ac_htile_dword; + +#define HTILE_Z_CODE(...) ((ac_htile_dword){.z = {__VA_ARGS__}}).dword +#define HTILE_ZS_CODE(...) ((ac_htile_dword){.zs = {__VA_ARGS__}}).dword +#define HTILE_ZS_VRS_CODE(...) ((ac_htile_dword){.zs_vrs = {__VA_ARGS__}}).dword + +/* depth must be in [0, 1]. This only clears HiZ and sets the Z/S state to "cleared". + * The DB register contain the full clear values. + */ +#define HTILE_Z_CLEAR_REG(depth) HTILE_Z_CODE( \ + .zmask = 0, \ + .minz = lroundf((depth) * 0x3FFF), \ + .maxz = lroundf((depth) * 0x3FFF)) + +#define HTILE_ZS_CLEAR_REG(depth) HTILE_ZS_CODE( \ + .zmask = 0, \ + .sr0 = 0x3, \ + .sr1 = 0x3, \ + .smem = 0, \ + .zdelta = 0, \ + .zbase = lroundf((depth) * 0x3FFF)) + +#define HTILE_ZS_VRS_CLEAR_REG(depth) HTILE_ZS_VRS_CODE( \ + .zmask = 0, \ + .sr0 = 0x3, \ + .smem = 0, \ + .zdelta = 0, \ + .zbase = lroundf((depth) * 0x3FFF), \ + .vrs_x = 0, /* VRS = 1x1 (0-based) */ \ + .vrs_y = 0) + +/* Zmask = Z uncompressed, minZ = 0, maxZ = 1. */ +#define HTILE_Z_UNCOMPRESSED HTILE_Z_CODE( \ + .zmask = 0xF, \ + .minz = 0, \ + .maxz = 0x3FFF) + +/* Zmask = Z uncompressed, SR0/SR1 = Stencil pretest is unknown, Smem = Stencil uncompressed, + * ZRange = [0, 1]. + */ +#define HTILE_ZS_UNCOMPRESSED HTILE_ZS_CODE( \ + .zmask = 0xF, \ + .sr0 = 0x3, \ + .sr1 = 0x3, \ + .smem = 0x3, \ + .zdelta = 0x3F, \ + .zbase = 0x3FFF) + +/* Zmask = Z uncompressed, SR0 = Stencil pretest is unknown, Smem = Stencil uncompressed, + * ZRange = [0, 1], VRS = 1x1 (0-based). + */ +#define HTILE_ZS_VRS_UNCOMPRESSED HTILE_ZS_VRS_CODE( \ + .zmask = 0xF, \ + .sr0 = 0x3, \ + .smem = 0x3, \ + .zdelta = 0x3F, \ + .zbase = 0x3FFF, \ + .vrs_x = 0, /* VRS = 1x1 (0-based) */ \ + .vrs_y = 0) + unsigned ac_map_swizzle(unsigned swizzle); diff --git a/src/amd/vulkan/meta/radv_meta_clear.c b/src/amd/vulkan/meta/radv_meta_clear.c index d591ccf980c..155382a2317 100644 --- a/src/amd/vulkan/meta/radv_meta_clear.c +++ b/src/amd/vulkan/meta/radv_meta_clear.c @@ -679,47 +679,14 @@ static uint32_t radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image, VkClearDepthStencilValue value) { - uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */ - uint32_t zmask = 0, smem = 0; - uint32_t htile_value; - uint32_t zmin, zmax; - - /* Convert the depth value to 14-bit zmin/zmax values. */ - zmin = lroundf(value.depth * max_zval); - zmax = zmin; - if (radv_image_tile_stencil_disabled(device, image)) { - /* Z only (no stencil): - * - * |31 18|17 4|3 0| - * +---------+---------+-------+ - * | Max Z | Min Z | ZMask | - */ - htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0)); + return HTILE_Z_CLEAR_REG(value.depth); } else { - - /* Z and stencil: - * - * |31 12|11 10|9 8|7 6|5 4|3 0| - * +-----------+-----+------+-----+-----+-------+ - * | Z Range | | SMem | SR1 | SR0 | ZMask | - * - * Z, stencil, 4 bit VRS encoding: - * |31 12| 11 10 |9 8|7 6 |5 4|3 0| - * +-----------+------------+------+------------+-----+-------+ - * | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask | - */ - uint32_t delta = 0; - uint32_t zrange = ((zmax << 6) | delta); - uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */ - if (radv_image_has_vrs_htile(device, image)) - sresults = 0x3; - - htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0)); + return HTILE_ZS_VRS_CLEAR_REG(value.depth); + else + return HTILE_ZS_CLEAR_REG(value.depth); } - - return htile_value; } static uint32_t @@ -727,32 +694,52 @@ radv_get_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image bool is_clear) { const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); - uint32_t mask = 0; if (radv_image_tile_stencil_disabled(device, image)) { /* All the HTILE buffer is used when there is no stencil. */ - mask = UINT32_MAX; + return UINT32_MAX; } else { - if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - mask |= 0xfffffc0f; - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) - mask |= 0x000003f0; - - /* Preserve VRS rates during clears but not during initialization. */ - if (is_clear && radv_image_has_vrs_htile(device, image)) { - mask &= ~(0x3 << 6); /* VRS X-rate */ - mask &= ~(0x3 << 10); /* VRS Y-rate */ - } - if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { /* Clear both aspects on SDMA, it's not ideal but there is no other way to initialize the * HTILE buffer. */ - mask = UINT32_MAX; + return UINT32_MAX; } - } - return mask; + uint32_t mask = 0; + + if (radv_image_has_vrs_htile(device, image)) { + /* Preserve VRS rates during clears but not during initialization. */ + uint32_t vrs_mask = is_clear ? 0 : 0x3; + + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + mask |= HTILE_ZS_VRS_CODE(.zmask = 0xF, + .zdelta = 0x3F, + .zbase = 0x3FFF, + .vrs_x = vrs_mask, + .vrs_y = vrs_mask); + } + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + mask |= HTILE_ZS_VRS_CODE(.sr0 = 0x3, + .smem = 0x3, + .vrs_x = vrs_mask, + .vrs_y = vrs_mask); + } + } else { + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { + mask |= HTILE_ZS_CODE(.zmask = 0xF, + .zdelta = 0x3F, + .zbase = 0x3FFF); + } + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + mask |= HTILE_ZS_CODE(.sr0 = 0x3, + .sr1 = 0x3, + .smem = 0x3); + } + } + + return mask; + } } static bool diff --git a/src/amd/vulkan/radv_image.h b/src/amd/vulkan/radv_image.h index d9b6d3e2885..fa360d740dd 100644 --- a/src/amd/vulkan/radv_image.h +++ b/src/amd/vulkan/radv_image.h @@ -296,40 +296,15 @@ radv_get_hiz_valid_va(const struct radv_image *image, uint32_t base_level) static inline uint32_t radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image) { - uint32_t initial_value; - if (radv_image_tile_stencil_disabled(device, image)) { - /* Z only (no stencil): - * - * |31 18|17 4|3 0| - * +---------+---------+-------+ - * | Max Z | Min Z | ZMask | - */ - initial_value = 0xfffc000f; + return HTILE_Z_UNCOMPRESSED; } else { - /* Z and stencil: - * - * |31 12|11 10|9 8|7 6|5 4|3 0| - * +-----------+-----+------+-----+-----+-------+ - * | Z Range | | SMem | SR1 | SR0 | ZMask | - * - * SR0/SR1 contains the stencil test results. Initializing - * SR0/SR1 to 0x3 means the stencil test result is unknown. - * - * Z, stencil and 4 bit VRS encoding: - * |31 12|11 10|9 8|7 6|5 4|3 0| - * +-----------+------------+------+------------+-----+-------+ - * | Z Range | VRS y-rate | SMem | VRS x-rate | SR0 | ZMask | - */ if (radv_image_has_vrs_htile(device, image)) { - /* Initialize the VRS x-rate value at 0, so the hw interprets it as 1 sample. */ - initial_value = 0xfffff33f; + return HTILE_ZS_VRS_UNCOMPRESSED; } else { - initial_value = 0xfffff3ff; + return HTILE_ZS_UNCOMPRESSED; } } - - return initial_value; } static inline uint32_t diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index e88eb21025b..cd476831295 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -573,52 +573,10 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_ static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth) { - /* Maximum 14-bit UINT value. */ - const uint32_t max_z_value = 0x3FFF; - - /* For clears, Zmask and Smem will always be set to zero. */ - const uint32_t zmask = 0; - const uint32_t smem = 0; - - /* Convert depthValue to 14-bit zmin/zmax uint values. */ - const uint32_t zmin = lroundf(depth * max_z_value); - const uint32_t zmax = zmin; - - if (tex->htile_stencil_disabled) { - /* Z-only HTILE is laid out as follows: - * |31 18|17 4|3 0| - * +---------+---------+-------+ - * | Max Z | Min Z | ZMask | - */ - return ((zmax & 0x3FFF) << 18) | - ((zmin & 0x3FFF) << 4) | - ((zmask & 0xF) << 0); - } else { - /* Z+S HTILE is laid out as-follows: - * |31 12|11 10|9 8|7 6|5 4|3 0| - * +-----------+-----+------+-----+-----+-------+ - * | Z Range | | SMem | SR1 | SR0 | ZMask | - * - * The base value for zRange is either zMax or zMin, depending on ZRANGE_PRECISION. - * For a fast clear, zMin == zMax == clearValue. This means that the base will - * always be the clear value (converted to 14-bit UINT). - * - * When abs(zMax-zMin) < 16, the delta is equal to the difference. In the case of - * fast clears, where zMax == zMin, the delta is always zero. - */ - const uint32_t delta = 0; - const uint32_t zrange = (zmax << 6) | delta; - - /* SResults 0 & 1 are set based on the stencil compare state. - * For fast-clear, the default value of sr0 and sr1 are both 0x3. - */ - const uint32_t sresults = 0xf; - - return ((zrange & 0xFFFFF) << 12) | - ((smem & 0x3) << 8) | - ((sresults & 0xF) << 4) | - ((zmask & 0xF) << 0); - } + if (tex->htile_stencil_disabled) + return HTILE_Z_CLEAR_REG(depth); + else + return HTILE_ZS_CLEAR_REG(depth); } static bool si_can_fast_clear_depth(struct si_texture *zstex, unsigned level, float depth, diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index ac6e228561f..54a4ad5babe 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -1264,14 +1264,10 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, CMASK_MSAA_FMASK_CLEAR_0_COLOR_EXPANDED); } if (tex->is_depth && tex->surface.meta_offset) { - uint32_t clear_value = 0; - - if (sscreen->info.gfx_level >= GFX9 || tex->tc_compatible_htile) - clear_value = 0x0000030F; - assert(num_clears < ARRAY_SIZE(clears)); si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, - tex->surface.meta_size, clear_value); + tex->surface.meta_size, + tex->htile_stencil_disabled ? HTILE_Z_UNCOMPRESSED : HTILE_ZS_UNCOMPRESSED); } /* Initialize DCC only if the texture is not being imported. */