First attempt at utah-style performance boxes. Uses Gareth's orignal

drawing framework but adds a bunch of new counted events.
This commit is contained in:
Keith Whitwell 2002-07-18 01:33:03 +00:00
parent c1869a9cb1
commit ea9f3a043e
6 changed files with 238 additions and 132 deletions

View file

@ -994,12 +994,9 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )
}
dev_priv->is_r200 = (init->func == RADEON_INIT_R200_CP);
dev_priv->do_boxes = 1;
dev_priv->cp_mode = init->cp_mode;
/* Simple idle check.
*/
atomic_set( &dev_priv->idle_count, 0 );
/* We don't support anything other than bus-mastering ring mode,
* but the ring can be in either AGP or PCI space for the ring
* read pointer.
@ -1438,14 +1435,20 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )
drm_buf_t *buf;
int i, t;
int start;
u32 done_age = DRM_READ32(&dev_priv->scratch[1]);
dev_priv->stats.requested_bufs++;
if (done_age == dev_priv->sarea_priv->last_dispatch)
dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE;
if ( ++dev_priv->last_buf >= dma->buf_count )
dev_priv->last_buf = 0;
start = dev_priv->last_buf;
/* printk("start %d count %d\n", start, dma->buf_count); */
for ( t = 0 ; t < dev_priv->usec_timeout ; t++ ) {
u32 done_age = DRM_READ32(&dev_priv->scratch[1]);
DRM_DEBUG("done_age = %d\n",done_age);
for ( i = start ; i < dma->buf_count ; i++ ) {
buf = dma->buflist[i];
@ -1453,13 +1456,17 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )
if ( buf->pid == 0 || (buf->pending &&
buf_priv->age <= done_age) ) {
buf->pending = 0;
/* if (t) printk("wait: %d\n", t); */
return buf;
}
start = 0;
}
DRM_UDELAY( 1 );
dev_priv->stats.freelist_loops++;
if (t) DRM_UDELAY( 1 );
done_age = DRM_READ32(&dev_priv->scratch[1]);
}
dev_priv->stats.freelist_timeouts++;
DRM_ERROR( "returning NULL!\n" );
return NULL;
}
@ -1487,11 +1494,24 @@ int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n )
{
drm_radeon_ring_buffer_t *ring = &dev_priv->ring;
int i;
u32 last_head = GET_RING_HEAD(ring);
for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) {
radeon_update_ring_snapshot( ring );
u32 head = GET_RING_HEAD(ring);
ring->space = (head - ring->tail) * sizeof(u32);
if ( ring->space <= 0 )
ring->space += ring->size;
if ( ring->space > n )
return 0;
dev_priv->stats.boxes |= RADEON_BOX_RING_FULL;
if (head != last_head)
i = 0;
last_head = head;
DRM_UDELAY( 1 );
}

View file

@ -85,8 +85,16 @@ typedef struct drm_radeon_private {
unsigned long phys_pci_gart;
dma_addr_t bus_pci_gart;
atomic_t idle_count;
struct {
u32 boxes;
int freelist_timeouts;
int freelist_loops;
int requested_bufs;
int last_frame_reads;
int last_clear_reads;
} stats;
int do_boxes;
int page_flipping;
int current_page;
u32 crtc_offset;
@ -136,14 +144,6 @@ extern drm_buf_t *radeon_freelist_get( drm_device_t *dev );
extern int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n );
static __inline__ void
radeon_update_ring_snapshot( drm_radeon_ring_buffer_t *ring )
{
ring->space = (GET_RING_HEAD(ring) - ring->tail) * sizeof(u32);
if ( ring->space <= 0 )
ring->space += ring->size;
}
extern int radeon_do_cp_idle( drm_radeon_private_t *dev_priv );
extern int radeon_do_cleanup_cp( drm_device_t *dev );
extern int radeon_do_cleanup_pageflip( drm_device_t *dev );
@ -161,6 +161,12 @@ extern int radeon_cp_cmdbuf( DRM_IOCTL_ARGS );
extern int radeon_cp_getparam( DRM_IOCTL_ARGS );
extern int radeon_cp_flip( DRM_IOCTL_ARGS );
/* Flags for stats.boxes
*/
#define RADEON_BOX_DMA_IDLE 0x1
#define RADEON_BOX_RING_FULL 0x2
#define RADEON_BOX_FLIP 0x4
/* Register definitions, register access macros and drmAddMap constants
@ -677,28 +683,6 @@ do { \
#define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \
do { \
drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i; \
if ( ring->space < ring->high_mark ) { \
for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { \
radeon_update_ring_snapshot( ring ); \
if ( ring->space >= ring->high_mark ) \
goto __ring_space_done; \
DRM_UDELAY( 1 ); \
} \
DRM_ERROR( "ring space check from memory failed, reading register...\n" ); \
/* If ring space check fails from RAM, try reading the \
register directly */ \
ring->space = 4 * ( RADEON_READ( RADEON_CP_RB_RPTR ) - ring->tail ); \
if ( ring->space <= 0 ) \
ring->space += ring->size; \
if ( ring->space >= ring->high_mark ) \
goto __ring_space_done; \
\
DRM_ERROR( "ring space check failed!\n" ); \
return DRM_ERR(EBUSY); \
} \
__ring_space_done: \
; \
} while (0)
#define VB_AGE_TEST_WITH_RETURN( dev_priv ) \
@ -814,6 +798,4 @@ do { \
} while (0)
#define RADEON_PERFORMANCE_BOXES 0
#endif /* __RADEON_DRV_H__ */

View file

@ -284,14 +284,6 @@ static struct {
#if RADEON_PERFORMANCE_BOXES
/* ================================================================
* Performance monitoring functions
*/
@ -300,10 +292,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
int x, int y, int w, int h,
int r, int g, int b )
{
u32 pitch, offset;
u32 color;
RING_LOCALS;
x += dev_priv->sarea_priv->boxes[0].x1;
y += dev_priv->sarea_priv->boxes[0].y1;
switch ( dev_priv->color_fmt ) {
case RADEON_COLOR_FORMAT_RGB565:
color = (((r & 0xf8) << 8) |
@ -316,8 +310,11 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
break;
}
offset = dev_priv->back_offset;
pitch = dev_priv->back_pitch >> 3;
BEGIN_RING( 4 );
RADEON_WAIT_UNTIL_3D_IDLE();
OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
OUT_RING( 0xffffffff );
ADVANCE_RING();
BEGIN_RING( 6 );
@ -329,7 +326,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
RADEON_ROP3_P |
RADEON_GMC_CLR_CMP_CNTL_DIS );
OUT_RING( (pitch << 22) | (offset >> 5) );
if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
OUT_RING( dev_priv->front_pitch_offset );
} else {
OUT_RING( dev_priv->back_pitch_offset );
}
OUT_RING( color );
OUT_RING( (x << 16) | y );
@ -340,16 +342,61 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
{
if ( atomic_read( &dev_priv->idle_count ) == 0 ) {
radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
} else {
atomic_set( &dev_priv->idle_count, 0 );
if (dev_priv->stats.requested_bufs) {
int i = dev_priv->stats.freelist_loops /
dev_priv->stats.requested_bufs;
if (dev_priv->stats.requested_bufs > 100)
dev_priv->stats.requested_bufs = 100;
radeon_clear_box( dev_priv, 16, 4, 8,
dev_priv->stats.requested_bufs,
255, 255, 255 );
if (i)
radeon_clear_box( dev_priv, 32, 4, 8, 8, 0, 64, 0 );
if (i > 10)
radeon_clear_box( dev_priv, 32, 13, 8, 8, 0, 128, 0 );
if (i > 100)
radeon_clear_box( dev_priv, 32, 22, 8, 8, 0, 255, 0 );
}
if ( dev_priv->stats.freelist_timeouts )
radeon_clear_box( dev_priv, 48, 4, 8, 8, 255, 0, 0 );
if ( dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE )
radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
if ( dev_priv->stats.boxes & RADEON_BOX_RING_FULL )
radeon_clear_box( dev_priv, 80, 4, 8, 8, 0, 0, 255 );
if ( dev_priv->stats.boxes & RADEON_BOX_FLIP )
radeon_clear_box( dev_priv, 96, 4, 8, 8, 255, 255, 0 );
if (dev_priv->stats.last_frame_reads > 1)
radeon_clear_box( dev_priv, 112, 4, 8, 8, 0, 64, 32 );
if (dev_priv->stats.last_frame_reads > 100)
radeon_clear_box( dev_priv, 112, 13, 8, 8, 0, 128, 64 );
if (dev_priv->stats.last_frame_reads > 1000)
radeon_clear_box( dev_priv, 112, 22, 8, 8, 0, 255, 128 );
if (dev_priv->stats.last_clear_reads > 1)
radeon_clear_box( dev_priv, 128, 4, 8, 8, 32, 64, 0 );
if (dev_priv->stats.last_clear_reads > 100)
radeon_clear_box( dev_priv, 128, 13, 8, 8, 64, 128, 0 );
if (dev_priv->stats.last_clear_reads > 1000)
radeon_clear_box( dev_priv, 128, 22, 8, 8, 128, 255, 0 );
memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
}
#endif
/* ================================================================
* CP command dispatch functions
*/
@ -675,11 +722,12 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev )
RING_LOCALS;
DRM_DEBUG( "\n" );
#if RADEON_PERFORMANCE_BOXES
/* Do some trivial performance monitoring...
*/
radeon_cp_performance_boxes( dev_priv );
#endif
if (dev_priv->do_boxes)
radeon_cp_performance_boxes( dev_priv );
/* Wait for the 3D stream to idle before dispatching the bitblt.
* This will prevent data corruption between the two streams.
@ -753,11 +801,12 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev )
dev_priv->current_page,
dev_priv->sarea_priv->pfCurrentPage);
#if RADEON_PERFORMANCE_BOXES
/* Do some trivial performance monitoring...
*/
radeon_cp_performance_boxes( dev_priv );
#endif
if (dev_priv->do_boxes) {
dev_priv->stats.boxes |= RADEON_BOX_FLIP;
radeon_cp_performance_boxes( dev_priv );
}
BEGIN_RING( 4 );
@ -2053,12 +2102,14 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS )
value = dev_priv->agp_buffers_offset;
break;
case RADEON_PARAM_LAST_FRAME:
dev_priv->stats.last_frame_reads++;
value = DRM_READ32(&dev_priv->scratch[0]);
break;
case RADEON_PARAM_LAST_DISPATCH:
value = DRM_READ32(&dev_priv->scratch[1]);
break;
case RADEON_PARAM_LAST_CLEAR:
dev_priv->stats.last_clear_reads++;
value = DRM_READ32(&dev_priv->scratch[2]);
break;
default:

View file

@ -994,12 +994,9 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )
}
dev_priv->is_r200 = (init->func == RADEON_INIT_R200_CP);
dev_priv->do_boxes = 1;
dev_priv->cp_mode = init->cp_mode;
/* Simple idle check.
*/
atomic_set( &dev_priv->idle_count, 0 );
/* We don't support anything other than bus-mastering ring mode,
* but the ring can be in either AGP or PCI space for the ring
* read pointer.
@ -1438,14 +1435,20 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )
drm_buf_t *buf;
int i, t;
int start;
u32 done_age = DRM_READ32(&dev_priv->scratch[1]);
dev_priv->stats.requested_bufs++;
if (done_age == dev_priv->sarea_priv->last_dispatch)
dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE;
if ( ++dev_priv->last_buf >= dma->buf_count )
dev_priv->last_buf = 0;
start = dev_priv->last_buf;
/* printk("start %d count %d\n", start, dma->buf_count); */
for ( t = 0 ; t < dev_priv->usec_timeout ; t++ ) {
u32 done_age = DRM_READ32(&dev_priv->scratch[1]);
DRM_DEBUG("done_age = %d\n",done_age);
for ( i = start ; i < dma->buf_count ; i++ ) {
buf = dma->buflist[i];
@ -1453,13 +1456,17 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )
if ( buf->pid == 0 || (buf->pending &&
buf_priv->age <= done_age) ) {
buf->pending = 0;
/* if (t) printk("wait: %d\n", t); */
return buf;
}
start = 0;
}
DRM_UDELAY( 1 );
dev_priv->stats.freelist_loops++;
if (t) DRM_UDELAY( 1 );
done_age = DRM_READ32(&dev_priv->scratch[1]);
}
dev_priv->stats.freelist_timeouts++;
DRM_ERROR( "returning NULL!\n" );
return NULL;
}
@ -1487,11 +1494,24 @@ int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n )
{
drm_radeon_ring_buffer_t *ring = &dev_priv->ring;
int i;
u32 last_head = GET_RING_HEAD(ring);
for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) {
radeon_update_ring_snapshot( ring );
u32 head = GET_RING_HEAD(ring);
ring->space = (head - ring->tail) * sizeof(u32);
if ( ring->space <= 0 )
ring->space += ring->size;
if ( ring->space > n )
return 0;
dev_priv->stats.boxes |= RADEON_BOX_RING_FULL;
if (head != last_head)
i = 0;
last_head = head;
DRM_UDELAY( 1 );
}

View file

@ -85,8 +85,16 @@ typedef struct drm_radeon_private {
unsigned long phys_pci_gart;
dma_addr_t bus_pci_gart;
atomic_t idle_count;
struct {
u32 boxes;
int freelist_timeouts;
int freelist_loops;
int requested_bufs;
int last_frame_reads;
int last_clear_reads;
} stats;
int do_boxes;
int page_flipping;
int current_page;
u32 crtc_offset;
@ -136,14 +144,6 @@ extern drm_buf_t *radeon_freelist_get( drm_device_t *dev );
extern int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n );
static __inline__ void
radeon_update_ring_snapshot( drm_radeon_ring_buffer_t *ring )
{
ring->space = (GET_RING_HEAD(ring) - ring->tail) * sizeof(u32);
if ( ring->space <= 0 )
ring->space += ring->size;
}
extern int radeon_do_cp_idle( drm_radeon_private_t *dev_priv );
extern int radeon_do_cleanup_cp( drm_device_t *dev );
extern int radeon_do_cleanup_pageflip( drm_device_t *dev );
@ -161,6 +161,12 @@ extern int radeon_cp_cmdbuf( DRM_IOCTL_ARGS );
extern int radeon_cp_getparam( DRM_IOCTL_ARGS );
extern int radeon_cp_flip( DRM_IOCTL_ARGS );
/* Flags for stats.boxes
*/
#define RADEON_BOX_DMA_IDLE 0x1
#define RADEON_BOX_RING_FULL 0x2
#define RADEON_BOX_FLIP 0x4
/* Register definitions, register access macros and drmAddMap constants
@ -677,28 +683,6 @@ do { \
#define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \
do { \
drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i; \
if ( ring->space < ring->high_mark ) { \
for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { \
radeon_update_ring_snapshot( ring ); \
if ( ring->space >= ring->high_mark ) \
goto __ring_space_done; \
DRM_UDELAY( 1 ); \
} \
DRM_ERROR( "ring space check from memory failed, reading register...\n" ); \
/* If ring space check fails from RAM, try reading the \
register directly */ \
ring->space = 4 * ( RADEON_READ( RADEON_CP_RB_RPTR ) - ring->tail ); \
if ( ring->space <= 0 ) \
ring->space += ring->size; \
if ( ring->space >= ring->high_mark ) \
goto __ring_space_done; \
\
DRM_ERROR( "ring space check failed!\n" ); \
return DRM_ERR(EBUSY); \
} \
__ring_space_done: \
; \
} while (0)
#define VB_AGE_TEST_WITH_RETURN( dev_priv ) \
@ -814,6 +798,4 @@ do { \
} while (0)
#define RADEON_PERFORMANCE_BOXES 0
#endif /* __RADEON_DRV_H__ */

View file

@ -284,14 +284,6 @@ static struct {
#if RADEON_PERFORMANCE_BOXES
/* ================================================================
* Performance monitoring functions
*/
@ -300,10 +292,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
int x, int y, int w, int h,
int r, int g, int b )
{
u32 pitch, offset;
u32 color;
RING_LOCALS;
x += dev_priv->sarea_priv->boxes[0].x1;
y += dev_priv->sarea_priv->boxes[0].y1;
switch ( dev_priv->color_fmt ) {
case RADEON_COLOR_FORMAT_RGB565:
color = (((r & 0xf8) << 8) |
@ -316,8 +310,11 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
break;
}
offset = dev_priv->back_offset;
pitch = dev_priv->back_pitch >> 3;
BEGIN_RING( 4 );
RADEON_WAIT_UNTIL_3D_IDLE();
OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
OUT_RING( 0xffffffff );
ADVANCE_RING();
BEGIN_RING( 6 );
@ -329,7 +326,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
RADEON_ROP3_P |
RADEON_GMC_CLR_CMP_CNTL_DIS );
OUT_RING( (pitch << 22) | (offset >> 5) );
if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
OUT_RING( dev_priv->front_pitch_offset );
} else {
OUT_RING( dev_priv->back_pitch_offset );
}
OUT_RING( color );
OUT_RING( (x << 16) | y );
@ -340,16 +342,61 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
{
if ( atomic_read( &dev_priv->idle_count ) == 0 ) {
radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
} else {
atomic_set( &dev_priv->idle_count, 0 );
if (dev_priv->stats.requested_bufs) {
int i = dev_priv->stats.freelist_loops /
dev_priv->stats.requested_bufs;
if (dev_priv->stats.requested_bufs > 100)
dev_priv->stats.requested_bufs = 100;
radeon_clear_box( dev_priv, 16, 4, 8,
dev_priv->stats.requested_bufs,
255, 255, 255 );
if (i)
radeon_clear_box( dev_priv, 32, 4, 8, 8, 0, 64, 0 );
if (i > 10)
radeon_clear_box( dev_priv, 32, 13, 8, 8, 0, 128, 0 );
if (i > 100)
radeon_clear_box( dev_priv, 32, 22, 8, 8, 0, 255, 0 );
}
if ( dev_priv->stats.freelist_timeouts )
radeon_clear_box( dev_priv, 48, 4, 8, 8, 255, 0, 0 );
if ( dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE )
radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
if ( dev_priv->stats.boxes & RADEON_BOX_RING_FULL )
radeon_clear_box( dev_priv, 80, 4, 8, 8, 0, 0, 255 );
if ( dev_priv->stats.boxes & RADEON_BOX_FLIP )
radeon_clear_box( dev_priv, 96, 4, 8, 8, 255, 255, 0 );
if (dev_priv->stats.last_frame_reads > 1)
radeon_clear_box( dev_priv, 112, 4, 8, 8, 0, 64, 32 );
if (dev_priv->stats.last_frame_reads > 100)
radeon_clear_box( dev_priv, 112, 13, 8, 8, 0, 128, 64 );
if (dev_priv->stats.last_frame_reads > 1000)
radeon_clear_box( dev_priv, 112, 22, 8, 8, 0, 255, 128 );
if (dev_priv->stats.last_clear_reads > 1)
radeon_clear_box( dev_priv, 128, 4, 8, 8, 32, 64, 0 );
if (dev_priv->stats.last_clear_reads > 100)
radeon_clear_box( dev_priv, 128, 13, 8, 8, 64, 128, 0 );
if (dev_priv->stats.last_clear_reads > 1000)
radeon_clear_box( dev_priv, 128, 22, 8, 8, 128, 255, 0 );
memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
}
#endif
/* ================================================================
* CP command dispatch functions
*/
@ -675,11 +722,12 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev )
RING_LOCALS;
DRM_DEBUG( "\n" );
#if RADEON_PERFORMANCE_BOXES
/* Do some trivial performance monitoring...
*/
radeon_cp_performance_boxes( dev_priv );
#endif
if (dev_priv->do_boxes)
radeon_cp_performance_boxes( dev_priv );
/* Wait for the 3D stream to idle before dispatching the bitblt.
* This will prevent data corruption between the two streams.
@ -753,11 +801,12 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev )
dev_priv->current_page,
dev_priv->sarea_priv->pfCurrentPage);
#if RADEON_PERFORMANCE_BOXES
/* Do some trivial performance monitoring...
*/
radeon_cp_performance_boxes( dev_priv );
#endif
if (dev_priv->do_boxes) {
dev_priv->stats.boxes |= RADEON_BOX_FLIP;
radeon_cp_performance_boxes( dev_priv );
}
BEGIN_RING( 4 );
@ -2053,12 +2102,14 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS )
value = dev_priv->agp_buffers_offset;
break;
case RADEON_PARAM_LAST_FRAME:
dev_priv->stats.last_frame_reads++;
value = DRM_READ32(&dev_priv->scratch[0]);
break;
case RADEON_PARAM_LAST_DISPATCH:
value = DRM_READ32(&dev_priv->scratch[1]);
break;
case RADEON_PARAM_LAST_CLEAR:
dev_priv->stats.last_clear_reads++;
value = DRM_READ32(&dev_priv->scratch[2]);
break;
default: