(Stephane Marchesin, me) add hyperz support to radeon drm. Only fast z

clear and z buffer compression are working correctly, hierarchical-z is
    not.
This commit is contained in:
Roland Scheidegger 2004-12-08 16:43:00 +00:00
parent 98d01f9542
commit c4a87c6883
9 changed files with 406 additions and 15 deletions

View file

@ -2007,6 +2007,18 @@ int radeon_preinit(struct drm_device *dev, unsigned long flags)
dev->dev_private = (void *)dev_priv;
dev_priv->flags = flags;
switch (flags & CHIP_FAMILY_MASK) {
case CHIP_R100:
case CHIP_RV200:
case CHIP_R200:
case CHIP_R300:
dev_priv->flags |= CHIP_HAS_HIERZ;
break;
default:
/* all other chips have no hierarchical z buffer */
break;
}
#ifdef __linux__
/* registers */
if ((ret = drm_initmap(dev, pci_resource_start(dev->pdev, 2),

View file

@ -144,7 +144,8 @@
#define RADEON_EMIT_PP_TEX_SIZE_1 74
#define RADEON_EMIT_PP_TEX_SIZE_2 75
#define R200_EMIT_RB3D_BLENDCOLOR 76
#define RADEON_MAX_STATE_PACKETS 77
#define R200_EMIT_TCL_POINT_SPRITE_CNTL 77
#define RADEON_MAX_STATE_PACKETS 78
/* Commands understood by cmd_buffer ioctl. More can be added but
* obviously these can't be removed or changed:
@ -189,6 +190,9 @@ typedef union {
#define RADEON_BACK 0x2
#define RADEON_DEPTH 0x4
#define RADEON_STENCIL 0x8
#define RADEON_CLEAR_FASTZ 0x80000000
#define RADEON_USE_HIERZ 0x40000000
#define RADEON_USE_COMP_ZBUF 0x20000000
/* Primitive types
*/

View file

@ -42,7 +42,7 @@
#define DRIVER_NAME "radeon"
#define DRIVER_DESC "ATI Radeon"
#define DRIVER_DATE "20020828"
#define DRIVER_DATE "20041207"
/* Interface history:
*
@ -78,10 +78,12 @@
* and GL_EXT_blend_[func|equation]_separate on r200
* 1.12- Add R300 CP microcode support - this just loads the CP on r300
* (No 3D support yet - just microcode loading).
* 1.13- Add packet R200_EMIT_TCL_POINT_SPRITE_CNTL for ARB_point_parameters
* - Add hyperz support, add hyperz flags to clear ioctl.
*/
#define DRIVER_MAJOR 1
#define DRIVER_MINOR 12
#define DRIVER_MINOR 13
#define DRIVER_PATCHLEVEL 0
enum radeon_family {
@ -117,6 +119,7 @@ enum radeon_chip_flags {
CHIP_IS_IGP = 0x00020000UL,
CHIP_SINGLE_CRTC = 0x00040000UL,
CHIP_IS_AGP = 0x00080000UL,
CHIP_HAS_HIERZ = 0x00100000UL,
};
#define GET_RING_HEAD(dev_priv) DRM_READ32( (dev_priv)->ring_rptr, 0 )
@ -466,6 +469,7 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
# define RADEON_STENCIL_ENABLE (1 << 7)
# define RADEON_Z_ENABLE (1 << 8)
#define RADEON_RB3D_DEPTHOFFSET 0x1c24
#define RADEON_RB3D_DEPTHCLEARVALUE 0x3230
#define RADEON_RB3D_DEPTHPITCH 0x1c28
#define RADEON_RB3D_PLANEMASK 0x1d84
#define RADEON_RB3D_STENCILREFMASK 0x1d7c
@ -478,11 +482,15 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_Z_TEST_MASK (7 << 4)
# define RADEON_Z_TEST_ALWAYS (7 << 4)
# define RADEON_Z_HIERARCHY_ENABLE (1 << 8)
# define RADEON_STENCIL_TEST_ALWAYS (7 << 12)
# define RADEON_STENCIL_S_FAIL_REPLACE (2 << 16)
# define RADEON_STENCIL_ZPASS_REPLACE (2 << 20)
# define RADEON_STENCIL_ZFAIL_REPLACE (2 << 24)
# define RADEON_Z_COMPRESSION_ENABLE (1 << 28)
# define RADEON_FORCE_Z_DIRTY (1 << 29)
# define RADEON_Z_WRITE_ENABLE (1 << 30)
# define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31)
#define RADEON_RBBM_SOFT_RESET 0x00f0
# define RADEON_SOFT_RESET_CP (1 << 0)
# define RADEON_SOFT_RESET_HI (1 << 1)
@ -590,7 +598,7 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
#define RADEON_RB3D_ZMASKOFFSET 0x1c34
#define RADEON_RB3D_ZMASKOFFSET 0x3234
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
# define RADEON_DEPTH_FORMAT_24BIT_INT_Z (2 << 0)
@ -644,6 +652,8 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
# define RADEON_3D_DRAW_IMMD 0x00002900
# define RADEON_3D_DRAW_INDX 0x00002A00
# define RADEON_3D_LOAD_VBPNTR 0x00002F00
# define RADEON_3D_CLEAR_ZMASK 0x00003200
# define RADEON_3D_CLEAR_HIZ 0x00003700
# define RADEON_CNTL_HOSTDATA_BLT 0x00009400
# define RADEON_CNTL_PAINT_MULTI 0x00009A00
# define RADEON_CNTL_BITBLT_MULTI 0x00009B00
@ -801,6 +811,8 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
#define R200_RB3D_BLENDCOLOR 0x3218
#define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4
/* Constants */
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */

View file

@ -271,6 +271,7 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
case RADEON_EMIT_PP_TEX_SIZE_1:
case RADEON_EMIT_PP_TEX_SIZE_2:
case R200_EMIT_RB3D_BLENDCOLOR:
case R200_EMIT_TCL_POINT_SPRITE_CNTL:
/* These packets don't contain memory offsets */
break;
@ -646,7 +647,9 @@ static struct {
RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, {
RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, {
RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, {
R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},};
R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, {
R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
};
/* ================================================================
* Performance monitoring functions
@ -858,11 +861,159 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
}
}
/* hyper z clear */
/* no docs available, based on reverse engeneering by Stephane Marchesin */
if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
int i;
int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?
(dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
u32 clearmask;
u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
((clear->depth_mask & 0xff) << 24);
/* Make sure we restore the 3D state next time.
* we haven't touched any "normal" state - still need this?
*/
dev_priv->sarea_priv->ctx_owner = 0;
if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
/* FIXME : reverse engineer that for Rx00 cards */
/* FIXME : the mask supposedly contains low-res z values. So can't set
just to the max (0xff? or actually 0x3fff?), need to take z clear
value into account? */
/* pattern seems to work for r100, though get slight
rendering errors with glxgears. If hierz is not enabled for r100,
only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
other ones are ignored, and the same clear mask can be used. That's
very different behaviour than R200 which needs different clear mask
and different number of tiles to clear if hierz is enabled or not !?!
*/
clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
}
else {
/* clear mask : chooses the clearing pattern.
rv250: could be used to clear only parts of macrotiles
(but that would get really complicated...)?
bit 0 and 1 (either or both of them ?!?!) are used to
not clear tile (or maybe one of the bits indicates if the tile is
compressed or not), bit 2 and 3 to not clear tile 1,...,.
Pattern is as follows:
| 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
bits -------------------------------------------------
| 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
rv100: clearmask covers 2x8 4x1 tiles, but one clear still
covers 256 pixels ?!?
*/
clearmask = 0x0;
}
BEGIN_RING( 8 );
RADEON_WAIT_UNTIL_2D_IDLE();
OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
tempRB3D_DEPTHCLEARVALUE);
/* what offset is this exactly ? */
OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
/* need ctlstat, otherwise get some strange black flickering */
OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
ADVANCE_RING();
for (i = 0; i < nbox; i++) {
int tileoffset, nrtilesx, nrtilesy, j;
/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
/* FIXME : figure this out for r200 (when hierz is enabled). Or
maybe r200 actually doesn't need to put the low-res z value into
the tile cache like r100, but just needs to clear the hi-level z-buffer?
Works for R100, both with hierz and without.
R100 seems to operate on 2x1 8x8 tiles, but...
odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
problematic with resolutions which are not 64 pix aligned? */
tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING( 4 );
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
/* first tile */
OUT_RING( tileoffset * 8 );
/* the number of tiles to clear */
OUT_RING( nrtilesx + 4 );
/* clear mask : chooses the clearing pattern. */
OUT_RING( clearmask );
ADVANCE_RING();
tileoffset += depthpixperline >> 6;
}
}
else if (dev_priv->microcode_version==UCODE_R200) {
/* works for rv250. */
/* find first macro tile (8x2 4x4 z-pixels on rv250) */
tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING( 4 );
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
/* first tile */
/* judging by the first tile offset needed, could possibly
directly address/clear 4x4 tiles instead of 8x2 * 4x4
macro tiles, though would still need clear mask for
right/bottom if truely 4x4 granularity is desired ? */
OUT_RING( tileoffset * 16 );
/* the number of tiles to clear */
OUT_RING( nrtilesx + 1 );
/* clear mask : chooses the clearing pattern. */
OUT_RING( clearmask );
ADVANCE_RING();
tileoffset += depthpixperline >> 5;
}
}
else { /* rv 100 */
/* rv100 might not need 64 pix alignment, who knows */
/* offsets are, hmm, weird */
tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING( 4 );
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
OUT_RING( tileoffset * 128 );
/* the number of tiles to clear */
OUT_RING( nrtilesx + 4 );
/* clear mask : chooses the clearing pattern. */
OUT_RING( clearmask );
ADVANCE_RING();
tileoffset += depthpixperline >> 6;
}
}
}
/* TODO don't always clear all hi-level z tiles */
if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
&& (flags & RADEON_USE_HIERZ))
/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
/* FIXME : the mask supposedly contains low-res z values. So can't set
just to the max (0xff? or actually 0x3fff?), need to take z clear
value into account? */
{
BEGIN_RING( 4 );
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
OUT_RING( 0x0 ); /* First tile */
OUT_RING( 0x3cc0 );
OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
ADVANCE_RING();
}
}
/* We have to clear the depth and/or stencil buffers by
* rendering a quad into just those buffers. Thus, we have to
* make sure the 3D engine is configured correctly.
*/
if ((dev_priv->microcode_version == UCODE_R200) && (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
else if ((dev_priv->microcode_version == UCODE_R200) &&
(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
int tempPP_CNTL;
int tempRE_CNTL;
@ -929,6 +1080,14 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
tempRB3D_STENCILREFMASK = 0x00000000;
}
if (flags & RADEON_USE_COMP_ZBUF) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
RADEON_Z_DECOMPRESSION_ENABLE;
}
if (flags & RADEON_USE_HIERZ) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
}
BEGIN_RING(26);
RADEON_WAIT_UNTIL_2D_IDLE();
@ -979,6 +1138,8 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
}
} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
rb3d_cntl = depth_clear->rb3d_cntl;
if (flags & RADEON_DEPTH) {
@ -995,6 +1156,14 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
rb3d_stencilrefmask = 0x00000000;
}
if (flags & RADEON_USE_COMP_ZBUF) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
RADEON_Z_DECOMPRESSION_ENABLE;
}
if (flags & RADEON_USE_HIERZ) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
}
BEGIN_RING(13);
RADEON_WAIT_UNTIL_2D_IDLE();
@ -1002,8 +1171,7 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
OUT_RING(0x00000000);
OUT_RING(rb3d_cntl);
OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL,
depth_clear->rb3d_zstencilcntl);
OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);

View file

@ -42,10 +42,10 @@
#define DRIVER_NAME "radeon"
#define DRIVER_DESC "ATI Radeon"
#define DRIVER_DATE "20020828"
#define DRIVER_DATE "20041207"
#define DRIVER_MAJOR 1
#define DRIVER_MINOR 12
#define DRIVER_MINOR 13
#define DRIVER_PATCHLEVEL 0
/* Interface history:
@ -82,6 +82,8 @@
* and GL_EXT_blend_[func|equation]_separate on r200
* 1.12- Add R300 CP microcode support - this just loads the CP on r300
* (No 3D support yet - just microcode loading).
* 1.13- Add packet R200_EMIT_TCL_POINT_SPRITE_CNTL for ARB_point_parameters
* - Add hyperz support, add hyperz flags to clear ioctl.
*/
#define DRIVER_IOCTLS \
[DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { radeon_cp_buffers, 1, 0 }, \

View file

@ -2017,6 +2017,18 @@ int radeon_preinit( struct drm_device *dev, unsigned long flags )
dev->dev_private = (void *)dev_priv;
dev_priv->flags = flags;
switch (flags & CHIP_FAMILY_MASK) {
case CHIP_R100:
case CHIP_RV200:
case CHIP_R200:
case CHIP_R300:
dev_priv->flags |= CHIP_HAS_HIERZ;
break;
default:
/* all other chips have no hierarchical z buffer */
break;
}
/* registers */
if( (ret = DRM(initmap)( dev, pci_resource_start( dev->pdev, 2 ),
pci_resource_len( dev->pdev, 2 ), _DRM_REGISTERS, 0 )))

View file

@ -145,7 +145,8 @@
#define RADEON_EMIT_PP_TEX_SIZE_1 74
#define RADEON_EMIT_PP_TEX_SIZE_2 75
#define R200_EMIT_RB3D_BLENDCOLOR 76
#define RADEON_MAX_STATE_PACKETS 77
#define R200_EMIT_TCL_POINT_SPRITE_CNTL 77
#define RADEON_MAX_STATE_PACKETS 78
/* Commands understood by cmd_buffer ioctl. More can be added but
@ -193,6 +194,9 @@ typedef union {
#define RADEON_BACK 0x2
#define RADEON_DEPTH 0x4
#define RADEON_STENCIL 0x8
#define RADEON_CLEAR_FASTZ 0x80000000
#define RADEON_USE_HIERZ 0x40000000
#define RADEON_USE_COMP_ZBUF 0x20000000
/* Primitive types
*/

View file

@ -68,6 +68,7 @@ enum radeon_chip_flags {
CHIP_IS_IGP = 0x00020000UL,
CHIP_SINGLE_CRTC = 0x00040000UL,
CHIP_IS_AGP = 0x00080000UL,
CHIP_HAS_HIERZ = 0x00100000UL,
};
#define GET_RING_HEAD(dev_priv) DRM_READ32( (dev_priv)->ring_rptr, 0 )
@ -411,6 +412,7 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
# define RADEON_STENCIL_ENABLE (1 << 7)
# define RADEON_Z_ENABLE (1 << 8)
#define RADEON_RB3D_DEPTHOFFSET 0x1c24
#define RADEON_RB3D_DEPTHCLEARVALUE 0x3230
#define RADEON_RB3D_DEPTHPITCH 0x1c28
#define RADEON_RB3D_PLANEMASK 0x1d84
#define RADEON_RB3D_STENCILREFMASK 0x1d7c
@ -423,11 +425,15 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_Z_TEST_MASK (7 << 4)
# define RADEON_Z_TEST_ALWAYS (7 << 4)
# define RADEON_Z_HIERARCHY_ENABLE (1 << 8)
# define RADEON_STENCIL_TEST_ALWAYS (7 << 12)
# define RADEON_STENCIL_S_FAIL_REPLACE (2 << 16)
# define RADEON_STENCIL_ZPASS_REPLACE (2 << 20)
# define RADEON_STENCIL_ZFAIL_REPLACE (2 << 24)
# define RADEON_Z_COMPRESSION_ENABLE (1 << 28)
# define RADEON_FORCE_Z_DIRTY (1 << 29)
# define RADEON_Z_WRITE_ENABLE (1 << 30)
# define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31)
#define RADEON_RBBM_SOFT_RESET 0x00f0
# define RADEON_SOFT_RESET_CP (1 << 0)
# define RADEON_SOFT_RESET_HI (1 << 1)
@ -535,7 +541,7 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
#define RADEON_RB3D_ZMASKOFFSET 0x1c34
#define RADEON_RB3D_ZMASKOFFSET 0x3234
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
# define RADEON_DEPTH_FORMAT_24BIT_INT_Z (2 << 0)
@ -590,6 +596,8 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
# define RADEON_3D_DRAW_IMMD 0x00002900
# define RADEON_3D_DRAW_INDX 0x00002A00
# define RADEON_3D_LOAD_VBPNTR 0x00002F00
# define RADEON_3D_CLEAR_ZMASK 0x00003200
# define RADEON_3D_CLEAR_HIZ 0x00003700
# define RADEON_CNTL_HOSTDATA_BLT 0x00009400
# define RADEON_CNTL_PAINT_MULTI 0x00009A00
# define RADEON_CNTL_BITBLT_MULTI 0x00009B00
@ -748,6 +756,8 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
#define R200_RB3D_BLENDCOLOR 0x3218
#define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4
/* Constants */
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */

View file

@ -205,6 +205,7 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_
case RADEON_EMIT_PP_TEX_SIZE_1:
case RADEON_EMIT_PP_TEX_SIZE_2:
case R200_EMIT_RB3D_BLENDCOLOR:
case R200_EMIT_TCL_POINT_SPRITE_CNTL:
/* These packets don't contain memory offsets */
break;
@ -569,6 +570,7 @@ static struct {
{ RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
{ RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
{ R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
{ R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
};
@ -780,12 +782,159 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
}
}
/* hyper z clear */
/* no docs available, based on reverse engeneering by Stephane Marchesin */
if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
int i;
int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?
(dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
u32 clearmask;
u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
((clear->depth_mask & 0xff) << 24);
/* Make sure we restore the 3D state next time.
* we haven't touched any "normal" state - still need this?
*/
dev_priv->sarea_priv->ctx_owner = 0;
if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
/* FIXME : reverse engineer that for Rx00 cards */
/* FIXME : the mask supposedly contains low-res z values. So can't set
just to the max (0xff? or actually 0x3fff?), need to take z clear
value into account? */
/* pattern seems to work for r100, though get slight
rendering errors with glxgears. If hierz is not enabled for r100,
only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
other ones are ignored, and the same clear mask can be used. That's
very different behaviour than R200 which needs different clear mask
and different number of tiles to clear if hierz is enabled or not !?!
*/
clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
}
else {
/* clear mask : chooses the clearing pattern.
rv250: could be used to clear only parts of macrotiles
(but that would get really complicated...)?
bit 0 and 1 (either or both of them ?!?!) are used to
not clear tile (or maybe one of the bits indicates if the tile is
compressed or not), bit 2 and 3 to not clear tile 1,...,.
Pattern is as follows:
| 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
bits -------------------------------------------------
| 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
rv100: clearmask covers 2x8 4x1 tiles, but one clear still
covers 256 pixels ?!?
*/
clearmask = 0x0;
}
BEGIN_RING( 8 );
RADEON_WAIT_UNTIL_2D_IDLE();
OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
tempRB3D_DEPTHCLEARVALUE);
/* what offset is this exactly ? */
OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
/* need ctlstat, otherwise get some strange black flickering */
OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
ADVANCE_RING();
for (i = 0; i < nbox; i++) {
int tileoffset, nrtilesx, nrtilesy, j;
/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
/* FIXME : figure this out for r200 (when hierz is enabled). Or
maybe r200 actually doesn't need to put the low-res z value into
the tile cache like r100, but just needs to clear the hi-level z-buffer?
Works for R100, both with hierz and without.
R100 seems to operate on 2x1 8x8 tiles, but...
odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
problematic with resolutions which are not 64 pix aligned? */
tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING( 4 );
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
/* first tile */
OUT_RING( tileoffset * 8 );
/* the number of tiles to clear */
OUT_RING( nrtilesx + 4 );
/* clear mask : chooses the clearing pattern. */
OUT_RING( clearmask );
ADVANCE_RING();
tileoffset += depthpixperline >> 6;
}
}
else if (dev_priv->microcode_version==UCODE_R200) {
/* works for rv250. */
/* find first macro tile (8x2 4x4 z-pixels on rv250) */
tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING( 4 );
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
/* first tile */
/* judging by the first tile offset needed, could possibly
directly address/clear 4x4 tiles instead of 8x2 * 4x4
macro tiles, though would still need clear mask for
right/bottom if truely 4x4 granularity is desired ? */
OUT_RING( tileoffset * 16 );
/* the number of tiles to clear */
OUT_RING( nrtilesx + 1 );
/* clear mask : chooses the clearing pattern. */
OUT_RING( clearmask );
ADVANCE_RING();
tileoffset += depthpixperline >> 5;
}
}
else { /* rv 100 */
/* rv100 might not need 64 pix alignment, who knows */
/* offsets are, hmm, weird */
tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
for (j = 0; j <= nrtilesy; j++) {
BEGIN_RING( 4 );
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
OUT_RING( tileoffset * 128 );
/* the number of tiles to clear */
OUT_RING( nrtilesx + 4 );
/* clear mask : chooses the clearing pattern. */
OUT_RING( clearmask );
ADVANCE_RING();
tileoffset += depthpixperline >> 6;
}
}
}
/* TODO don't always clear all hi-level z tiles */
if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
&& (flags & RADEON_USE_HIERZ))
/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
/* FIXME : the mask supposedly contains low-res z values. So can't set
just to the max (0xff? or actually 0x3fff?), need to take z clear
value into account? */
{
BEGIN_RING( 4 );
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
OUT_RING( 0x0 ); /* First tile */
OUT_RING( 0x3cc0 );
OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
ADVANCE_RING();
}
}
/* We have to clear the depth and/or stencil buffers by
* rendering a quad into just those buffers. Thus, we have to
* make sure the 3D engine is configured correctly.
*/
if ( (dev_priv->microcode_version==UCODE_R200) &&
(flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
else if ((dev_priv->microcode_version == UCODE_R200) &&
(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
int tempPP_CNTL;
int tempRE_CNTL;
@ -855,6 +1004,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
tempRB3D_STENCILREFMASK = 0x00000000;
}
if (flags & RADEON_USE_COMP_ZBUF) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
RADEON_Z_DECOMPRESSION_ENABLE;
}
if (flags & RADEON_USE_HIERZ) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
}
BEGIN_RING( 26 );
RADEON_WAIT_UNTIL_2D_IDLE();
@ -909,6 +1066,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
}
else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
rb3d_cntl = depth_clear->rb3d_cntl;
if ( flags & RADEON_DEPTH ) {
@ -925,6 +1084,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
rb3d_stencilrefmask = 0x00000000;
}
if (flags & RADEON_USE_COMP_ZBUF) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
RADEON_Z_DECOMPRESSION_ENABLE;
}
if (flags & RADEON_USE_HIERZ) {
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
}
BEGIN_RING( 13 );
RADEON_WAIT_UNTIL_2D_IDLE();
@ -933,7 +1100,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
OUT_RING( rb3d_cntl );
OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
depth_clear->rb3d_zstencilcntl );
tempRB3D_ZSTENCILCNTL );
OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
rb3d_stencilrefmask );
OUT_RING_REG( RADEON_RB3D_PLANEMASK,