mirror of
https://gitlab.freedesktop.org/mesa/drm.git
synced 2026-05-22 16:18:14 +02:00
(Stephane Marchesin, me) add hyperz support to radeon drm. Only fast z
clear and z buffer compression are working correctly, hierarchical-z is
not.
This commit is contained in:
parent
98d01f9542
commit
c4a87c6883
9 changed files with 406 additions and 15 deletions
|
|
@ -2007,6 +2007,18 @@ int radeon_preinit(struct drm_device *dev, unsigned long flags)
|
|||
dev->dev_private = (void *)dev_priv;
|
||||
dev_priv->flags = flags;
|
||||
|
||||
switch (flags & CHIP_FAMILY_MASK) {
|
||||
case CHIP_R100:
|
||||
case CHIP_RV200:
|
||||
case CHIP_R200:
|
||||
case CHIP_R300:
|
||||
dev_priv->flags |= CHIP_HAS_HIERZ;
|
||||
break;
|
||||
default:
|
||||
/* all other chips have no hierarchical z buffer */
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
/* registers */
|
||||
if ((ret = drm_initmap(dev, pci_resource_start(dev->pdev, 2),
|
||||
|
|
|
|||
|
|
@ -144,7 +144,8 @@
|
|||
#define RADEON_EMIT_PP_TEX_SIZE_1 74
|
||||
#define RADEON_EMIT_PP_TEX_SIZE_2 75
|
||||
#define R200_EMIT_RB3D_BLENDCOLOR 76
|
||||
#define RADEON_MAX_STATE_PACKETS 77
|
||||
#define R200_EMIT_TCL_POINT_SPRITE_CNTL 77
|
||||
#define RADEON_MAX_STATE_PACKETS 78
|
||||
|
||||
/* Commands understood by cmd_buffer ioctl. More can be added but
|
||||
* obviously these can't be removed or changed:
|
||||
|
|
@ -189,6 +190,9 @@ typedef union {
|
|||
#define RADEON_BACK 0x2
|
||||
#define RADEON_DEPTH 0x4
|
||||
#define RADEON_STENCIL 0x8
|
||||
#define RADEON_CLEAR_FASTZ 0x80000000
|
||||
#define RADEON_USE_HIERZ 0x40000000
|
||||
#define RADEON_USE_COMP_ZBUF 0x20000000
|
||||
|
||||
/* Primitive types
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@
|
|||
|
||||
#define DRIVER_NAME "radeon"
|
||||
#define DRIVER_DESC "ATI Radeon"
|
||||
#define DRIVER_DATE "20020828"
|
||||
#define DRIVER_DATE "20041207"
|
||||
|
||||
/* Interface history:
|
||||
*
|
||||
|
|
@ -78,10 +78,12 @@
|
|||
* and GL_EXT_blend_[func|equation]_separate on r200
|
||||
* 1.12- Add R300 CP microcode support - this just loads the CP on r300
|
||||
* (No 3D support yet - just microcode loading).
|
||||
* 1.13- Add packet R200_EMIT_TCL_POINT_SPRITE_CNTL for ARB_point_parameters
|
||||
* - Add hyperz support, add hyperz flags to clear ioctl.
|
||||
*/
|
||||
|
||||
#define DRIVER_MAJOR 1
|
||||
#define DRIVER_MINOR 12
|
||||
#define DRIVER_MINOR 13
|
||||
#define DRIVER_PATCHLEVEL 0
|
||||
|
||||
enum radeon_family {
|
||||
|
|
@ -117,6 +119,7 @@ enum radeon_chip_flags {
|
|||
CHIP_IS_IGP = 0x00020000UL,
|
||||
CHIP_SINGLE_CRTC = 0x00040000UL,
|
||||
CHIP_IS_AGP = 0x00080000UL,
|
||||
CHIP_HAS_HIERZ = 0x00100000UL,
|
||||
};
|
||||
|
||||
#define GET_RING_HEAD(dev_priv) DRM_READ32( (dev_priv)->ring_rptr, 0 )
|
||||
|
|
@ -466,6 +469,7 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
|
|||
# define RADEON_STENCIL_ENABLE (1 << 7)
|
||||
# define RADEON_Z_ENABLE (1 << 8)
|
||||
#define RADEON_RB3D_DEPTHOFFSET 0x1c24
|
||||
#define RADEON_RB3D_DEPTHCLEARVALUE 0x3230
|
||||
#define RADEON_RB3D_DEPTHPITCH 0x1c28
|
||||
#define RADEON_RB3D_PLANEMASK 0x1d84
|
||||
#define RADEON_RB3D_STENCILREFMASK 0x1d7c
|
||||
|
|
@ -478,11 +482,15 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
|
|||
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
|
||||
# define RADEON_Z_TEST_MASK (7 << 4)
|
||||
# define RADEON_Z_TEST_ALWAYS (7 << 4)
|
||||
# define RADEON_Z_HIERARCHY_ENABLE (1 << 8)
|
||||
# define RADEON_STENCIL_TEST_ALWAYS (7 << 12)
|
||||
# define RADEON_STENCIL_S_FAIL_REPLACE (2 << 16)
|
||||
# define RADEON_STENCIL_ZPASS_REPLACE (2 << 20)
|
||||
# define RADEON_STENCIL_ZFAIL_REPLACE (2 << 24)
|
||||
# define RADEON_Z_COMPRESSION_ENABLE (1 << 28)
|
||||
# define RADEON_FORCE_Z_DIRTY (1 << 29)
|
||||
# define RADEON_Z_WRITE_ENABLE (1 << 30)
|
||||
# define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31)
|
||||
#define RADEON_RBBM_SOFT_RESET 0x00f0
|
||||
# define RADEON_SOFT_RESET_CP (1 << 0)
|
||||
# define RADEON_SOFT_RESET_HI (1 << 1)
|
||||
|
|
@ -590,7 +598,7 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
|
|||
# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
|
||||
# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
|
||||
|
||||
#define RADEON_RB3D_ZMASKOFFSET 0x1c34
|
||||
#define RADEON_RB3D_ZMASKOFFSET 0x3234
|
||||
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
|
||||
# define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
|
||||
# define RADEON_DEPTH_FORMAT_24BIT_INT_Z (2 << 0)
|
||||
|
|
@ -644,6 +652,8 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
|
|||
# define RADEON_3D_DRAW_IMMD 0x00002900
|
||||
# define RADEON_3D_DRAW_INDX 0x00002A00
|
||||
# define RADEON_3D_LOAD_VBPNTR 0x00002F00
|
||||
# define RADEON_3D_CLEAR_ZMASK 0x00003200
|
||||
# define RADEON_3D_CLEAR_HIZ 0x00003700
|
||||
# define RADEON_CNTL_HOSTDATA_BLT 0x00009400
|
||||
# define RADEON_CNTL_PAINT_MULTI 0x00009A00
|
||||
# define RADEON_CNTL_BITBLT_MULTI 0x00009B00
|
||||
|
|
@ -801,6 +811,8 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,
|
|||
|
||||
#define R200_RB3D_BLENDCOLOR 0x3218
|
||||
|
||||
#define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4
|
||||
|
||||
/* Constants */
|
||||
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
|
||||
|
||||
|
|
|
|||
|
|
@ -271,6 +271,7 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
|
|||
case RADEON_EMIT_PP_TEX_SIZE_1:
|
||||
case RADEON_EMIT_PP_TEX_SIZE_2:
|
||||
case R200_EMIT_RB3D_BLENDCOLOR:
|
||||
case R200_EMIT_TCL_POINT_SPRITE_CNTL:
|
||||
/* These packets don't contain memory offsets */
|
||||
break;
|
||||
|
||||
|
|
@ -646,7 +647,9 @@ static struct {
|
|||
RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, {
|
||||
RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, {
|
||||
RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, {
|
||||
R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},};
|
||||
R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, {
|
||||
R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
|
||||
};
|
||||
|
||||
/* ================================================================
|
||||
* Performance monitoring functions
|
||||
|
|
@ -858,11 +861,159 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
|
|||
}
|
||||
}
|
||||
|
||||
/* hyper z clear */
|
||||
/* no docs available, based on reverse engeneering by Stephane Marchesin */
|
||||
if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
|
||||
|
||||
int i;
|
||||
int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?
|
||||
(dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
|
||||
|
||||
u32 clearmask;
|
||||
|
||||
u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
|
||||
((clear->depth_mask & 0xff) << 24);
|
||||
|
||||
|
||||
/* Make sure we restore the 3D state next time.
|
||||
* we haven't touched any "normal" state - still need this?
|
||||
*/
|
||||
dev_priv->sarea_priv->ctx_owner = 0;
|
||||
|
||||
if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
|
||||
/* FIXME : reverse engineer that for Rx00 cards */
|
||||
/* FIXME : the mask supposedly contains low-res z values. So can't set
|
||||
just to the max (0xff? or actually 0x3fff?), need to take z clear
|
||||
value into account? */
|
||||
/* pattern seems to work for r100, though get slight
|
||||
rendering errors with glxgears. If hierz is not enabled for r100,
|
||||
only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
|
||||
other ones are ignored, and the same clear mask can be used. That's
|
||||
very different behaviour than R200 which needs different clear mask
|
||||
and different number of tiles to clear if hierz is enabled or not !?!
|
||||
*/
|
||||
clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
|
||||
}
|
||||
else {
|
||||
/* clear mask : chooses the clearing pattern.
|
||||
rv250: could be used to clear only parts of macrotiles
|
||||
(but that would get really complicated...)?
|
||||
bit 0 and 1 (either or both of them ?!?!) are used to
|
||||
not clear tile (or maybe one of the bits indicates if the tile is
|
||||
compressed or not), bit 2 and 3 to not clear tile 1,...,.
|
||||
Pattern is as follows:
|
||||
| 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
|
||||
bits -------------------------------------------------
|
||||
| 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
|
||||
rv100: clearmask covers 2x8 4x1 tiles, but one clear still
|
||||
covers 256 pixels ?!?
|
||||
*/
|
||||
clearmask = 0x0;
|
||||
}
|
||||
|
||||
BEGIN_RING( 8 );
|
||||
RADEON_WAIT_UNTIL_2D_IDLE();
|
||||
OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
|
||||
tempRB3D_DEPTHCLEARVALUE);
|
||||
/* what offset is this exactly ? */
|
||||
OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
|
||||
/* need ctlstat, otherwise get some strange black flickering */
|
||||
OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
|
||||
ADVANCE_RING();
|
||||
|
||||
for (i = 0; i < nbox; i++) {
|
||||
int tileoffset, nrtilesx, nrtilesy, j;
|
||||
/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
|
||||
if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
|
||||
/* FIXME : figure this out for r200 (when hierz is enabled). Or
|
||||
maybe r200 actually doesn't need to put the low-res z value into
|
||||
the tile cache like r100, but just needs to clear the hi-level z-buffer?
|
||||
Works for R100, both with hierz and without.
|
||||
R100 seems to operate on 2x1 8x8 tiles, but...
|
||||
odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
|
||||
problematic with resolutions which are not 64 pix aligned? */
|
||||
tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
|
||||
nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
|
||||
nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
|
||||
for (j = 0; j <= nrtilesy; j++) {
|
||||
BEGIN_RING( 4 );
|
||||
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
|
||||
/* first tile */
|
||||
OUT_RING( tileoffset * 8 );
|
||||
/* the number of tiles to clear */
|
||||
OUT_RING( nrtilesx + 4 );
|
||||
/* clear mask : chooses the clearing pattern. */
|
||||
OUT_RING( clearmask );
|
||||
ADVANCE_RING();
|
||||
tileoffset += depthpixperline >> 6;
|
||||
}
|
||||
}
|
||||
else if (dev_priv->microcode_version==UCODE_R200) {
|
||||
/* works for rv250. */
|
||||
/* find first macro tile (8x2 4x4 z-pixels on rv250) */
|
||||
tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
|
||||
nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
|
||||
nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
|
||||
for (j = 0; j <= nrtilesy; j++) {
|
||||
BEGIN_RING( 4 );
|
||||
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
|
||||
/* first tile */
|
||||
/* judging by the first tile offset needed, could possibly
|
||||
directly address/clear 4x4 tiles instead of 8x2 * 4x4
|
||||
macro tiles, though would still need clear mask for
|
||||
right/bottom if truely 4x4 granularity is desired ? */
|
||||
OUT_RING( tileoffset * 16 );
|
||||
/* the number of tiles to clear */
|
||||
OUT_RING( nrtilesx + 1 );
|
||||
/* clear mask : chooses the clearing pattern. */
|
||||
OUT_RING( clearmask );
|
||||
ADVANCE_RING();
|
||||
tileoffset += depthpixperline >> 5;
|
||||
}
|
||||
}
|
||||
else { /* rv 100 */
|
||||
/* rv100 might not need 64 pix alignment, who knows */
|
||||
/* offsets are, hmm, weird */
|
||||
tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
|
||||
nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
|
||||
nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
|
||||
for (j = 0; j <= nrtilesy; j++) {
|
||||
BEGIN_RING( 4 );
|
||||
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
|
||||
OUT_RING( tileoffset * 128 );
|
||||
/* the number of tiles to clear */
|
||||
OUT_RING( nrtilesx + 4 );
|
||||
/* clear mask : chooses the clearing pattern. */
|
||||
OUT_RING( clearmask );
|
||||
ADVANCE_RING();
|
||||
tileoffset += depthpixperline >> 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO don't always clear all hi-level z tiles */
|
||||
if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
|
||||
&& (flags & RADEON_USE_HIERZ))
|
||||
/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
|
||||
/* FIXME : the mask supposedly contains low-res z values. So can't set
|
||||
just to the max (0xff? or actually 0x3fff?), need to take z clear
|
||||
value into account? */
|
||||
{
|
||||
BEGIN_RING( 4 );
|
||||
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
|
||||
OUT_RING( 0x0 ); /* First tile */
|
||||
OUT_RING( 0x3cc0 );
|
||||
OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
|
||||
ADVANCE_RING();
|
||||
}
|
||||
}
|
||||
|
||||
/* We have to clear the depth and/or stencil buffers by
|
||||
* rendering a quad into just those buffers. Thus, we have to
|
||||
* make sure the 3D engine is configured correctly.
|
||||
*/
|
||||
if ((dev_priv->microcode_version == UCODE_R200) && (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
|
||||
else if ((dev_priv->microcode_version == UCODE_R200) &&
|
||||
(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
|
||||
|
||||
int tempPP_CNTL;
|
||||
int tempRE_CNTL;
|
||||
|
|
@ -929,6 +1080,14 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
|
|||
tempRB3D_STENCILREFMASK = 0x00000000;
|
||||
}
|
||||
|
||||
if (flags & RADEON_USE_COMP_ZBUF) {
|
||||
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
|
||||
RADEON_Z_DECOMPRESSION_ENABLE;
|
||||
}
|
||||
if (flags & RADEON_USE_HIERZ) {
|
||||
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
|
||||
}
|
||||
|
||||
BEGIN_RING(26);
|
||||
RADEON_WAIT_UNTIL_2D_IDLE();
|
||||
|
||||
|
|
@ -979,6 +1138,8 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
|
|||
}
|
||||
} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
|
||||
|
||||
int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
|
||||
|
||||
rb3d_cntl = depth_clear->rb3d_cntl;
|
||||
|
||||
if (flags & RADEON_DEPTH) {
|
||||
|
|
@ -995,6 +1156,14 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
|
|||
rb3d_stencilrefmask = 0x00000000;
|
||||
}
|
||||
|
||||
if (flags & RADEON_USE_COMP_ZBUF) {
|
||||
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
|
||||
RADEON_Z_DECOMPRESSION_ENABLE;
|
||||
}
|
||||
if (flags & RADEON_USE_HIERZ) {
|
||||
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
|
||||
}
|
||||
|
||||
BEGIN_RING(13);
|
||||
RADEON_WAIT_UNTIL_2D_IDLE();
|
||||
|
||||
|
|
@ -1002,8 +1171,7 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
|
|||
OUT_RING(0x00000000);
|
||||
OUT_RING(rb3d_cntl);
|
||||
|
||||
OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL,
|
||||
depth_clear->rb3d_zstencilcntl);
|
||||
OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
|
||||
OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
|
||||
OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
|
||||
OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
|
||||
|
|
|
|||
|
|
@ -42,10 +42,10 @@
|
|||
|
||||
#define DRIVER_NAME "radeon"
|
||||
#define DRIVER_DESC "ATI Radeon"
|
||||
#define DRIVER_DATE "20020828"
|
||||
#define DRIVER_DATE "20041207"
|
||||
|
||||
#define DRIVER_MAJOR 1
|
||||
#define DRIVER_MINOR 12
|
||||
#define DRIVER_MINOR 13
|
||||
#define DRIVER_PATCHLEVEL 0
|
||||
|
||||
/* Interface history:
|
||||
|
|
@ -82,6 +82,8 @@
|
|||
* and GL_EXT_blend_[func|equation]_separate on r200
|
||||
* 1.12- Add R300 CP microcode support - this just loads the CP on r300
|
||||
* (No 3D support yet - just microcode loading).
|
||||
* 1.13- Add packet R200_EMIT_TCL_POINT_SPRITE_CNTL for ARB_point_parameters
|
||||
* - Add hyperz support, add hyperz flags to clear ioctl.
|
||||
*/
|
||||
#define DRIVER_IOCTLS \
|
||||
[DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { radeon_cp_buffers, 1, 0 }, \
|
||||
|
|
|
|||
|
|
@ -2017,6 +2017,18 @@ int radeon_preinit( struct drm_device *dev, unsigned long flags )
|
|||
dev->dev_private = (void *)dev_priv;
|
||||
dev_priv->flags = flags;
|
||||
|
||||
switch (flags & CHIP_FAMILY_MASK) {
|
||||
case CHIP_R100:
|
||||
case CHIP_RV200:
|
||||
case CHIP_R200:
|
||||
case CHIP_R300:
|
||||
dev_priv->flags |= CHIP_HAS_HIERZ;
|
||||
break;
|
||||
default:
|
||||
/* all other chips have no hierarchical z buffer */
|
||||
break;
|
||||
}
|
||||
|
||||
/* registers */
|
||||
if( (ret = DRM(initmap)( dev, pci_resource_start( dev->pdev, 2 ),
|
||||
pci_resource_len( dev->pdev, 2 ), _DRM_REGISTERS, 0 )))
|
||||
|
|
|
|||
|
|
@ -145,7 +145,8 @@
|
|||
#define RADEON_EMIT_PP_TEX_SIZE_1 74
|
||||
#define RADEON_EMIT_PP_TEX_SIZE_2 75
|
||||
#define R200_EMIT_RB3D_BLENDCOLOR 76
|
||||
#define RADEON_MAX_STATE_PACKETS 77
|
||||
#define R200_EMIT_TCL_POINT_SPRITE_CNTL 77
|
||||
#define RADEON_MAX_STATE_PACKETS 78
|
||||
|
||||
|
||||
/* Commands understood by cmd_buffer ioctl. More can be added but
|
||||
|
|
@ -193,6 +194,9 @@ typedef union {
|
|||
#define RADEON_BACK 0x2
|
||||
#define RADEON_DEPTH 0x4
|
||||
#define RADEON_STENCIL 0x8
|
||||
#define RADEON_CLEAR_FASTZ 0x80000000
|
||||
#define RADEON_USE_HIERZ 0x40000000
|
||||
#define RADEON_USE_COMP_ZBUF 0x20000000
|
||||
|
||||
/* Primitive types
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ enum radeon_chip_flags {
|
|||
CHIP_IS_IGP = 0x00020000UL,
|
||||
CHIP_SINGLE_CRTC = 0x00040000UL,
|
||||
CHIP_IS_AGP = 0x00080000UL,
|
||||
CHIP_HAS_HIERZ = 0x00100000UL,
|
||||
};
|
||||
|
||||
#define GET_RING_HEAD(dev_priv) DRM_READ32( (dev_priv)->ring_rptr, 0 )
|
||||
|
|
@ -411,6 +412,7 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
|
|||
# define RADEON_STENCIL_ENABLE (1 << 7)
|
||||
# define RADEON_Z_ENABLE (1 << 8)
|
||||
#define RADEON_RB3D_DEPTHOFFSET 0x1c24
|
||||
#define RADEON_RB3D_DEPTHCLEARVALUE 0x3230
|
||||
#define RADEON_RB3D_DEPTHPITCH 0x1c28
|
||||
#define RADEON_RB3D_PLANEMASK 0x1d84
|
||||
#define RADEON_RB3D_STENCILREFMASK 0x1d7c
|
||||
|
|
@ -423,11 +425,15 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
|
|||
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
|
||||
# define RADEON_Z_TEST_MASK (7 << 4)
|
||||
# define RADEON_Z_TEST_ALWAYS (7 << 4)
|
||||
# define RADEON_Z_HIERARCHY_ENABLE (1 << 8)
|
||||
# define RADEON_STENCIL_TEST_ALWAYS (7 << 12)
|
||||
# define RADEON_STENCIL_S_FAIL_REPLACE (2 << 16)
|
||||
# define RADEON_STENCIL_ZPASS_REPLACE (2 << 20)
|
||||
# define RADEON_STENCIL_ZFAIL_REPLACE (2 << 24)
|
||||
# define RADEON_Z_COMPRESSION_ENABLE (1 << 28)
|
||||
# define RADEON_FORCE_Z_DIRTY (1 << 29)
|
||||
# define RADEON_Z_WRITE_ENABLE (1 << 30)
|
||||
# define RADEON_Z_DECOMPRESSION_ENABLE (1 << 31)
|
||||
#define RADEON_RBBM_SOFT_RESET 0x00f0
|
||||
# define RADEON_SOFT_RESET_CP (1 << 0)
|
||||
# define RADEON_SOFT_RESET_HI (1 << 1)
|
||||
|
|
@ -535,7 +541,7 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
|
|||
# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
|
||||
# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
|
||||
|
||||
#define RADEON_RB3D_ZMASKOFFSET 0x1c34
|
||||
#define RADEON_RB3D_ZMASKOFFSET 0x3234
|
||||
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
|
||||
# define RADEON_DEPTH_FORMAT_16BIT_INT_Z (0 << 0)
|
||||
# define RADEON_DEPTH_FORMAT_24BIT_INT_Z (2 << 0)
|
||||
|
|
@ -590,6 +596,8 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
|
|||
# define RADEON_3D_DRAW_IMMD 0x00002900
|
||||
# define RADEON_3D_DRAW_INDX 0x00002A00
|
||||
# define RADEON_3D_LOAD_VBPNTR 0x00002F00
|
||||
# define RADEON_3D_CLEAR_ZMASK 0x00003200
|
||||
# define RADEON_3D_CLEAR_HIZ 0x00003700
|
||||
# define RADEON_CNTL_HOSTDATA_BLT 0x00009400
|
||||
# define RADEON_CNTL_PAINT_MULTI 0x00009A00
|
||||
# define RADEON_CNTL_BITBLT_MULTI 0x00009B00
|
||||
|
|
@ -748,6 +756,8 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );
|
|||
|
||||
#define R200_RB3D_BLENDCOLOR 0x3218
|
||||
|
||||
#define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4
|
||||
|
||||
/* Constants */
|
||||
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
|
||||
|
||||
|
|
|
|||
|
|
@ -205,6 +205,7 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_
|
|||
case RADEON_EMIT_PP_TEX_SIZE_1:
|
||||
case RADEON_EMIT_PP_TEX_SIZE_2:
|
||||
case R200_EMIT_RB3D_BLENDCOLOR:
|
||||
case R200_EMIT_TCL_POINT_SPRITE_CNTL:
|
||||
/* These packets don't contain memory offsets */
|
||||
break;
|
||||
|
||||
|
|
@ -569,6 +570,7 @@ static struct {
|
|||
{ RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
|
||||
{ RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
|
||||
{ R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
|
||||
{ R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -780,12 +782,159 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
|
|||
}
|
||||
}
|
||||
|
||||
/* hyper z clear */
|
||||
/* no docs available, based on reverse engeneering by Stephane Marchesin */
|
||||
if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
|
||||
|
||||
int i;
|
||||
int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?
|
||||
(dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
|
||||
|
||||
u32 clearmask;
|
||||
|
||||
u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
|
||||
((clear->depth_mask & 0xff) << 24);
|
||||
|
||||
|
||||
/* Make sure we restore the 3D state next time.
|
||||
* we haven't touched any "normal" state - still need this?
|
||||
*/
|
||||
dev_priv->sarea_priv->ctx_owner = 0;
|
||||
|
||||
if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
|
||||
/* FIXME : reverse engineer that for Rx00 cards */
|
||||
/* FIXME : the mask supposedly contains low-res z values. So can't set
|
||||
just to the max (0xff? or actually 0x3fff?), need to take z clear
|
||||
value into account? */
|
||||
/* pattern seems to work for r100, though get slight
|
||||
rendering errors with glxgears. If hierz is not enabled for r100,
|
||||
only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
|
||||
other ones are ignored, and the same clear mask can be used. That's
|
||||
very different behaviour than R200 which needs different clear mask
|
||||
and different number of tiles to clear if hierz is enabled or not !?!
|
||||
*/
|
||||
clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
|
||||
}
|
||||
else {
|
||||
/* clear mask : chooses the clearing pattern.
|
||||
rv250: could be used to clear only parts of macrotiles
|
||||
(but that would get really complicated...)?
|
||||
bit 0 and 1 (either or both of them ?!?!) are used to
|
||||
not clear tile (or maybe one of the bits indicates if the tile is
|
||||
compressed or not), bit 2 and 3 to not clear tile 1,...,.
|
||||
Pattern is as follows:
|
||||
| 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
|
||||
bits -------------------------------------------------
|
||||
| 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
|
||||
rv100: clearmask covers 2x8 4x1 tiles, but one clear still
|
||||
covers 256 pixels ?!?
|
||||
*/
|
||||
clearmask = 0x0;
|
||||
}
|
||||
|
||||
BEGIN_RING( 8 );
|
||||
RADEON_WAIT_UNTIL_2D_IDLE();
|
||||
OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
|
||||
tempRB3D_DEPTHCLEARVALUE);
|
||||
/* what offset is this exactly ? */
|
||||
OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
|
||||
/* need ctlstat, otherwise get some strange black flickering */
|
||||
OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
|
||||
ADVANCE_RING();
|
||||
|
||||
for (i = 0; i < nbox; i++) {
|
||||
int tileoffset, nrtilesx, nrtilesy, j;
|
||||
/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
|
||||
if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
|
||||
/* FIXME : figure this out for r200 (when hierz is enabled). Or
|
||||
maybe r200 actually doesn't need to put the low-res z value into
|
||||
the tile cache like r100, but just needs to clear the hi-level z-buffer?
|
||||
Works for R100, both with hierz and without.
|
||||
R100 seems to operate on 2x1 8x8 tiles, but...
|
||||
odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
|
||||
problematic with resolutions which are not 64 pix aligned? */
|
||||
tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
|
||||
nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
|
||||
nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
|
||||
for (j = 0; j <= nrtilesy; j++) {
|
||||
BEGIN_RING( 4 );
|
||||
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
|
||||
/* first tile */
|
||||
OUT_RING( tileoffset * 8 );
|
||||
/* the number of tiles to clear */
|
||||
OUT_RING( nrtilesx + 4 );
|
||||
/* clear mask : chooses the clearing pattern. */
|
||||
OUT_RING( clearmask );
|
||||
ADVANCE_RING();
|
||||
tileoffset += depthpixperline >> 6;
|
||||
}
|
||||
}
|
||||
else if (dev_priv->microcode_version==UCODE_R200) {
|
||||
/* works for rv250. */
|
||||
/* find first macro tile (8x2 4x4 z-pixels on rv250) */
|
||||
tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
|
||||
nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
|
||||
nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
|
||||
for (j = 0; j <= nrtilesy; j++) {
|
||||
BEGIN_RING( 4 );
|
||||
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
|
||||
/* first tile */
|
||||
/* judging by the first tile offset needed, could possibly
|
||||
directly address/clear 4x4 tiles instead of 8x2 * 4x4
|
||||
macro tiles, though would still need clear mask for
|
||||
right/bottom if truely 4x4 granularity is desired ? */
|
||||
OUT_RING( tileoffset * 16 );
|
||||
/* the number of tiles to clear */
|
||||
OUT_RING( nrtilesx + 1 );
|
||||
/* clear mask : chooses the clearing pattern. */
|
||||
OUT_RING( clearmask );
|
||||
ADVANCE_RING();
|
||||
tileoffset += depthpixperline >> 5;
|
||||
}
|
||||
}
|
||||
else { /* rv 100 */
|
||||
/* rv100 might not need 64 pix alignment, who knows */
|
||||
/* offsets are, hmm, weird */
|
||||
tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
|
||||
nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
|
||||
nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
|
||||
for (j = 0; j <= nrtilesy; j++) {
|
||||
BEGIN_RING( 4 );
|
||||
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
|
||||
OUT_RING( tileoffset * 128 );
|
||||
/* the number of tiles to clear */
|
||||
OUT_RING( nrtilesx + 4 );
|
||||
/* clear mask : chooses the clearing pattern. */
|
||||
OUT_RING( clearmask );
|
||||
ADVANCE_RING();
|
||||
tileoffset += depthpixperline >> 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO don't always clear all hi-level z tiles */
|
||||
if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
|
||||
&& (flags & RADEON_USE_HIERZ))
|
||||
/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
|
||||
/* FIXME : the mask supposedly contains low-res z values. So can't set
|
||||
just to the max (0xff? or actually 0x3fff?), need to take z clear
|
||||
value into account? */
|
||||
{
|
||||
BEGIN_RING( 4 );
|
||||
OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
|
||||
OUT_RING( 0x0 ); /* First tile */
|
||||
OUT_RING( 0x3cc0 );
|
||||
OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
|
||||
ADVANCE_RING();
|
||||
}
|
||||
}
|
||||
|
||||
/* We have to clear the depth and/or stencil buffers by
|
||||
* rendering a quad into just those buffers. Thus, we have to
|
||||
* make sure the 3D engine is configured correctly.
|
||||
*/
|
||||
if ( (dev_priv->microcode_version==UCODE_R200) &&
|
||||
(flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
|
||||
else if ((dev_priv->microcode_version == UCODE_R200) &&
|
||||
(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
|
||||
|
||||
int tempPP_CNTL;
|
||||
int tempRE_CNTL;
|
||||
|
|
@ -855,6 +1004,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
|
|||
tempRB3D_STENCILREFMASK = 0x00000000;
|
||||
}
|
||||
|
||||
if (flags & RADEON_USE_COMP_ZBUF) {
|
||||
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
|
||||
RADEON_Z_DECOMPRESSION_ENABLE;
|
||||
}
|
||||
if (flags & RADEON_USE_HIERZ) {
|
||||
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
|
||||
}
|
||||
|
||||
BEGIN_RING( 26 );
|
||||
RADEON_WAIT_UNTIL_2D_IDLE();
|
||||
|
||||
|
|
@ -909,6 +1066,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
|
|||
}
|
||||
else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
|
||||
|
||||
int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
|
||||
|
||||
rb3d_cntl = depth_clear->rb3d_cntl;
|
||||
|
||||
if ( flags & RADEON_DEPTH ) {
|
||||
|
|
@ -925,6 +1084,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
|
|||
rb3d_stencilrefmask = 0x00000000;
|
||||
}
|
||||
|
||||
if (flags & RADEON_USE_COMP_ZBUF) {
|
||||
tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
|
||||
RADEON_Z_DECOMPRESSION_ENABLE;
|
||||
}
|
||||
if (flags & RADEON_USE_HIERZ) {
|
||||
tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
|
||||
}
|
||||
|
||||
BEGIN_RING( 13 );
|
||||
RADEON_WAIT_UNTIL_2D_IDLE();
|
||||
|
||||
|
|
@ -933,7 +1100,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
|
|||
OUT_RING( rb3d_cntl );
|
||||
|
||||
OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
|
||||
depth_clear->rb3d_zstencilcntl );
|
||||
tempRB3D_ZSTENCILCNTL );
|
||||
OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
|
||||
rb3d_stencilrefmask );
|
||||
OUT_RING_REG( RADEON_RB3D_PLANEMASK,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue