r300g: implement fast color clear

An initial implementation made by Dave Airlie.

For it to be used, a color-only clear must be invoked and exactly one
point-sampled render target must be set. The render target must be
macrotiled (for us to overcome alignment issues) and bpp must be either
16 or 32.

I can't see a difference in performance. :(

Conflicts:

	src/gallium/drivers/r300/r300_blit.c
This commit is contained in:
Marek Olšák 2010-07-12 13:23:24 +02:00
parent 78e8a8765f
commit 8c836f7f74
10 changed files with 178 additions and 13 deletions

View file

@ -24,6 +24,7 @@
#include "r300_texture.h"
#include "util/u_format.h"
#include "util/u_pack_color.h"
enum r300_blitter_op /* bitmask */
{
@ -79,6 +80,48 @@ static void r300_blitter_end(struct r300_context *r300)
}
}
static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
const float* rgba)
{
union util_color uc;
util_pack_color(rgba, format, &uc);
if (util_format_get_blocksizebits(format) == 32)
return uc.ui;
else
return uc.us | (uc.us << 16);
}
static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
unsigned clear_buffers)
{
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
unsigned bpp;
/* Only color clear allowed, and only one colorbuffer. */
if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
return FALSE;
/* The colorbuffer must be point-sampled. */
if (surf->base.texture->nr_samples > 1)
return FALSE;
bpp = util_format_get_blocksizebits(surf->base.format);
/* ZB can only work with the two pixel sizes. */
if (bpp != 16 && bpp != 32)
return FALSE;
/* If the midpoint ZB offset is not aligned to 2048, it returns garbage
* with certain texture sizes. Macrotiling ensures the alignment. */
if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level])
return FALSE;
return TRUE;
}
/* Clear currently bound buffers. */
static void r300_clear(struct pipe_context* pipe,
unsigned buffers,
@ -124,16 +167,40 @@ static void r300_clear(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct pipe_framebuffer_state *fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
struct r300_hyperz_state *hyperz =
(struct r300_hyperz_state*)r300->hyperz_state.state;
uint32_t width = fb->width;
uint32_t height = fb->height;
/* Enable CBZB clear. */
if (r300_cbzb_clear_allowed(r300, buffers)) {
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
hyperz->zb_depthclearvalue =
r300_depth_clear_cb_value(surf->base.format, rgba);
width = surf->cbzb_width;
height = surf->cbzb_height;
r300->cbzb_clear = TRUE;
r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
}
/* Clear. */
r300_blitter_begin(r300, R300_CLEAR);
util_blitter_clear(r300->blitter,
fb->width,
fb->height,
width,
height,
fb->nr_cbufs,
buffers, rgba, depth, stencil);
r300_blitter_end(r300);
/* Disable CBZB clear. */
if (r300->cbzb_clear) {
r300->cbzb_clear = FALSE;
r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
}
/* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
if (r300->flush_counter == 0)
pipe->flush(pipe, 0, NULL);

View file

@ -330,7 +330,7 @@ static void r300_init_states(struct pipe_context *pipe)
BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size);
OUT_CB_REG(R300_ZB_BW_CNTL, 0);
OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
OUT_CB_REG(R300_SC_HYPERZ, 0x1C);
OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
END_CB;
}
}

View file

@ -311,6 +311,13 @@ struct r300_surface {
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
/* Parameters dedicated to the CBZB clear. */
uint32_t cbzb_width; /* Aligned width. */
uint32_t cbzb_height; /* Half of the height. */
uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */
uint32_t cbzb_pitch; /* DEPTHPITCH. */
uint32_t cbzb_format; /* ZB_FORMAT. */
};
struct r300_texture {
@ -525,6 +532,7 @@ struct r300_context {
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
boolean incompatible_vb_layout;
boolean cbzb_clear;
/* upload managers */
struct u_upload_mgr *upload_vb;
struct u_upload_mgr *upload_ib;
@ -593,7 +601,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
/* r300_state.c */
enum r300_fb_state_change {
R300_CHANGED_FB_STATE = 0
R300_CHANGED_FB_STATE = 0,
R300_CHANGED_CBZB_FLAG
};
void r300_mark_fb_state_dirty(struct r300_context *r300,

View file

@ -32,6 +32,7 @@
#include "r300_emit.h"
#include "r300_fs.h"
#include "r300_screen.h"
#include "r300_texture.h"
#include "r300_screen_buffer.h"
#include "r300_vs.h"
@ -272,8 +273,17 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
struct pipe_framebuffer_state* fb =
(struct pipe_framebuffer_state*)r300->fb_state.state;
uint32_t height = fb->height;
uint32_t width = fb->width;
CS_LOCALS(r300);
if (r300->cbzb_clear) {
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
height = surf->cbzb_height;
width = surf->cbzb_width;
}
BEGIN_CS(size);
/* Set up scissors.
@ -281,13 +291,13 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
if (r300->screen->caps.is_r500) {
OUT_CS(0);
OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) |
((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |
((height - 1) << R300_SCISSORS_Y_SHIFT));
} else {
OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
(1440 << R300_SCISSORS_Y_SHIFT));
OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) |
((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |
((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
}
/* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
@ -344,8 +354,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
}
/* Set up the ZB part of the CBZB clear. */
if (r300->cbzb_clear) {
surf = r300_surface(fb->cbufs[0]);
OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain, 0);
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain, 0);
}
/* Set up a zbuffer. */
if (fb->zsbuf) {
else if (fb->zsbuf) {
surf = r300_surface(fb->zsbuf);
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
@ -377,6 +399,18 @@ void r300_emit_hyperz_state(struct r300_context *r300,
WRITE_CS_TABLE(state, size);
}
void r300_emit_hyperz_end(struct r300_context *r300)
{
struct r300_hyperz_state z =
*(struct r300_hyperz_state*)r300->hyperz_state.state;
z.zb_bw_cntl = 0;
z.zb_depthclearvalue = 0;
z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
}
void r300_emit_fb_state_pipelined(struct r300_context *r300,
unsigned size, void *state)
{

View file

@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300,
void r300_emit_dsa_state(struct r300_context* r300,
unsigned size, void* state);
void r300_emit_hyperz_state(struct r300_context *r300,
unsigned size, void *state);
void r300_emit_hyperz_end(struct r300_context *r300);
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
@ -64,9 +69,6 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
void r300_emit_hyperz_state(struct r300_context *r300,
unsigned size, void *state);
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);

View file

@ -48,6 +48,7 @@ static void r300_flush(struct pipe_context* pipe,
}
if (r300->dirty_hw) {
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
r300->flush_counter++;

View file

@ -26,6 +26,22 @@
#include "r300_reg.h"
#include "r300_fs.h"
/*****************************************************************************/
/* The HyperZ setup */
/*****************************************************************************/
static void r300_update_hyperz(struct r300_context* r300)
{
struct r300_hyperz_state *z =
(struct r300_hyperz_state*)r300->hyperz_state.state;
z->zb_bw_cntl = 0;
z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
if (r300->cbzb_clear)
z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
}
/*****************************************************************************/
/* The ZTOP state */
/*****************************************************************************/
@ -118,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300)
void r300_update_hyperz_state(struct r300_context* r300)
{
r300_update_ztop(r300);
if (r300->hyperz_state.dirty) {
r300_update_hyperz(r300);
}
}

View file

@ -224,6 +224,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
/* Emitted in flush. */
end_dwords += 26; /* emit_query_end */
end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */
cs_dwords += end_dwords;

View file

@ -688,7 +688,9 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
/* Now compute the fb_state atom size. */
r300->fb_state.size = 2 + (8 * state->nr_cbufs);
if (state->zsbuf)
if (r300->cbzb_clear)
r300->fb_state.size += 10;
else if (state->zsbuf)
r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14;
/* The size of the rest of atoms stays the same. */

View file

@ -1034,6 +1034,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
if (surface) {
uint32_t stride, offset, tile_height;
pipe_reference_init(&surface->base.reference, 1);
pipe_resource_reference(&surface->base.texture, texture);
surface->base.format = texture->format;
@ -1054,6 +1056,34 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
surface->offset = r300_texture_get_offset(tex, level, zslice, face);
surface->pitch = tex->fb_state.pitch[level];
surface->format = tex->fb_state.format;
/* Parameters for the CBZB clear. */
surface->cbzb_width = align(surface->base.width, 64);
/* Height must be aligned to the size of a tile. */
tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
DIM_HEIGHT);
surface->cbzb_height = align((surface->base.height + 1) / 2,
tile_height);
/* Offset must be aligned to 2K and must point at the beginning
* of a scanline. */
stride = r300_texture_get_stride(r300_screen(screen), tex, level);
offset = surface->offset + stride * surface->cbzb_height;
surface->cbzb_midpoint_offset = offset & ~2047;
surface->cbzb_pitch = surface->pitch & 0x1ffffc;
if (util_format_get_blocksizebits(surface->base.format) == 32)
surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
else
surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
SCREEN_DBG(r300_screen(screen), DBG_TEX,
"CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n",
surface->cbzb_width, surface->cbzb_height,
offset & 2047,
tex->mip_macrotile[level] ? "YES" : " NO");
}
return &surface->base;