mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
i965: Use ISL for CCS layouts
One can now also delete intel_get_non_msrt_mcs_alignment().
v2 (Jason): Do not leak aux buf but allocate only after getting
ISL surfaces.
Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
parent
96dbe765e1
commit
544ed74315
2 changed files with 36 additions and 102 deletions
|
|
@ -101,66 +101,6 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* For single-sampled render targets ("non-MSRT"), the MCS buffer is a
|
||||
* scaled-down bitfield representation of the color buffer which is capable of
|
||||
* recording when blocks of the color buffer are equal to the clear value.
|
||||
* This function returns the block size that will be used by the MCS buffer
|
||||
* corresponding to a certain color miptree.
|
||||
*
|
||||
* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
|
||||
* beneath the "Fast Color Clear" bullet (p327):
|
||||
*
|
||||
* The following table describes the RT alignment
|
||||
*
|
||||
* Pixels Lines
|
||||
* TiledY RT CL
|
||||
* bpp
|
||||
* 32 8 4
|
||||
* 64 4 4
|
||||
* 128 2 4
|
||||
* TiledX RT CL
|
||||
* bpp
|
||||
* 32 16 2
|
||||
* 64 8 2
|
||||
* 128 4 2
|
||||
*
|
||||
* This alignment has the following uses:
|
||||
*
|
||||
* - For figuring out the size of the MCS buffer. Each 4k tile in the MCS
|
||||
* buffer contains 128 blocks horizontally and 256 blocks vertically.
|
||||
*
|
||||
* - For figuring out alignment restrictions for a fast clear operation. Fast
|
||||
* clear operations must always clear aligned multiples of 16 blocks
|
||||
* horizontally and 32 blocks vertically.
|
||||
*
|
||||
* - For scaling down the coordinates sent through the render pipeline during
|
||||
* a fast clear. X coordinates must be scaled down by 8 times the block
|
||||
* width, and Y coordinates by 16 times the block height.
|
||||
*
|
||||
* - For scaling down the coordinates sent through the render pipeline during
|
||||
* a "Render Target Resolve" operation. X coordinates must be scaled down
|
||||
* by half the block width, and Y coordinates by half the block height.
|
||||
*/
|
||||
void
|
||||
intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt,
|
||||
unsigned *width_px, unsigned *height)
|
||||
{
|
||||
switch (mt->tiling) {
|
||||
default:
|
||||
unreachable("Non-MSRT MCS requires X or Y tiling");
|
||||
/* In release builds, fall through */
|
||||
case I915_TILING_Y:
|
||||
*width_px = 32 / mt->cpp;
|
||||
*height = 4;
|
||||
break;
|
||||
case I915_TILING_X:
|
||||
*width_px = 64 / mt->cpp;
|
||||
*height = 2;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
intel_tiling_supports_non_msrt_mcs(const struct brw_context *brw,
|
||||
unsigned tiling)
|
||||
|
|
@ -1654,55 +1594,53 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
|
|||
assert(!mt->disable_aux_buffers);
|
||||
assert(!mt->no_ccs);
|
||||
|
||||
/* The format of the MCS buffer is opaque to the driver; all that matters
|
||||
* is that we get its size and pitch right. We'll pretend that the format
|
||||
* is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
|
||||
* R32 buffer is 32 pixels across, we'll need to scale the width down by
|
||||
* the block width and then a further factor of 4. Since an MCS tile
|
||||
* covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
|
||||
* we'll need to scale the height down by the block height and then a
|
||||
* further factor of 8.
|
||||
struct isl_surf temp_main_surf;
|
||||
struct isl_surf temp_ccs_surf;
|
||||
|
||||
/* Create first an ISL presentation for the main color surface and let ISL
|
||||
* calculate equivalent CCS surface against it.
|
||||
*/
|
||||
const mesa_format format = MESA_FORMAT_R_UINT32;
|
||||
unsigned block_width_px;
|
||||
unsigned block_height;
|
||||
intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height);
|
||||
unsigned width_divisor = block_width_px * 4;
|
||||
unsigned height_divisor = block_height * 8;
|
||||
intel_miptree_get_isl_surf(brw, mt, &temp_main_surf);
|
||||
if (!isl_surf_get_ccs_surf(&brw->isl_dev, &temp_main_surf, &temp_ccs_surf))
|
||||
return false;
|
||||
|
||||
/* The Skylake MCS is twice as tall as the Broadwell MCS.
|
||||
*
|
||||
* In pre-Skylake, each bit in the MCS contained the state of 2 cachelines
|
||||
* in the main surface. In Skylake, it's two bits. The extra bit
|
||||
* doubles the MCS height, not width, because in Skylake the MCS is always
|
||||
* Y-tiled.
|
||||
*/
|
||||
if (brw->gen >= 9)
|
||||
height_divisor /= 2;
|
||||
assert(temp_ccs_surf.size &&
|
||||
(temp_ccs_surf.size % temp_ccs_surf.row_pitch == 0));
|
||||
|
||||
unsigned mcs_width =
|
||||
ALIGN(mt->logical_width0, width_divisor) / width_divisor;
|
||||
unsigned mcs_height =
|
||||
ALIGN(mt->logical_height0, height_divisor) / height_divisor;
|
||||
assert(mt->logical_depth0 == 1);
|
||||
struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
|
||||
if (!buf)
|
||||
return false;
|
||||
|
||||
buf->size = temp_ccs_surf.size;
|
||||
buf->pitch = temp_ccs_surf.row_pitch;
|
||||
buf->qpitch = isl_surf_get_array_pitch_sa_rows(&temp_ccs_surf);
|
||||
|
||||
uint32_t layout_flags =
|
||||
(brw->gen >= 8) ? MIPTREE_LAYOUT_FORCE_HALIGN16 : 0;
|
||||
/* In case of compression mcs buffer needs to be initialised requiring the
|
||||
* buffer to be immediately mapped to cpu space for writing. Therefore do
|
||||
* not use the gpu access flag which can cause an unnecessary delay if the
|
||||
* backing pages happened to be just used by the GPU.
|
||||
*/
|
||||
if (!is_lossless_compressed)
|
||||
layout_flags |= MIPTREE_LAYOUT_ACCELERATED_UPLOAD;
|
||||
const uint32_t alloc_flags =
|
||||
is_lossless_compressed ? 0 : BO_ALLOC_FOR_RENDER;
|
||||
uint32_t tiling = I915_TILING_Y;
|
||||
unsigned long pitch;
|
||||
|
||||
mt->mcs_buf = intel_mcs_miptree_buf_create(brw, mt,
|
||||
format,
|
||||
mcs_width,
|
||||
mcs_height,
|
||||
layout_flags);
|
||||
if (!mt->mcs_buf)
|
||||
/* ISL has stricter set of alignment rules then the drm allocator.
|
||||
* Therefore one can pass the ISL dimensions in terms of bytes instead of
|
||||
* trying to recalculate based on different format block sizes.
|
||||
*/
|
||||
buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "ccs-miptree",
|
||||
buf->pitch, buf->size / buf->pitch,
|
||||
1, &tiling, &pitch, alloc_flags);
|
||||
if (buf->bo) {
|
||||
assert(pitch == buf->pitch);
|
||||
assert(tiling == I915_TILING_Y);
|
||||
} else {
|
||||
free(buf);
|
||||
return false;
|
||||
}
|
||||
|
||||
mt->mcs_buf = buf;
|
||||
|
||||
/* From Gen9 onwards single-sampled (non-msrt) auxiliary buffers are
|
||||
* used for lossless compression which requires similar initialisation
|
||||
|
|
|
|||
|
|
@ -658,10 +658,6 @@ struct intel_mipmap_tree
|
|||
GLuint refcount;
|
||||
};
|
||||
|
||||
void
|
||||
intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt,
|
||||
unsigned *width_px, unsigned *height);
|
||||
|
||||
bool
|
||||
intel_miptree_is_lossless_compressed(const struct brw_context *brw,
|
||||
const struct intel_mipmap_tree *mt);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue