mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 02:28:10 +02:00
r300g: implement fast color clear
An initial implementation made by Dave Airlie. For it to be used, a color-only clear must be invoked and exactly one point-sampled render target must be set. The render target must be macrotiled (for us to overcome alignment issues) and bpp must be either 16 or 32. I can't see a difference in performance. :( Conflicts: src/gallium/drivers/r300/r300_blit.c
This commit is contained in:
parent
78e8a8765f
commit
8c836f7f74
10 changed files with 178 additions and 13 deletions
|
|
@ -24,6 +24,7 @@
|
|||
#include "r300_texture.h"
|
||||
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_pack_color.h"
|
||||
|
||||
enum r300_blitter_op /* bitmask */
|
||||
{
|
||||
|
|
@ -79,6 +80,48 @@ static void r300_blitter_end(struct r300_context *r300)
|
|||
}
|
||||
}
|
||||
|
||||
static uint32_t r300_depth_clear_cb_value(enum pipe_format format,
|
||||
const float* rgba)
|
||||
{
|
||||
union util_color uc;
|
||||
util_pack_color(rgba, format, &uc);
|
||||
|
||||
if (util_format_get_blocksizebits(format) == 32)
|
||||
return uc.ui;
|
||||
else
|
||||
return uc.us | (uc.us << 16);
|
||||
}
|
||||
|
||||
static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
|
||||
unsigned clear_buffers)
|
||||
{
|
||||
struct pipe_framebuffer_state *fb =
|
||||
(struct pipe_framebuffer_state*)r300->fb_state.state;
|
||||
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
|
||||
unsigned bpp;
|
||||
|
||||
/* Only color clear allowed, and only one colorbuffer. */
|
||||
if (clear_buffers != PIPE_CLEAR_COLOR || fb->nr_cbufs != 1)
|
||||
return FALSE;
|
||||
|
||||
/* The colorbuffer must be point-sampled. */
|
||||
if (surf->base.texture->nr_samples > 1)
|
||||
return FALSE;
|
||||
|
||||
bpp = util_format_get_blocksizebits(surf->base.format);
|
||||
|
||||
/* ZB can only work with the two pixel sizes. */
|
||||
if (bpp != 16 && bpp != 32)
|
||||
return FALSE;
|
||||
|
||||
/* If the midpoint ZB offset is not aligned to 2048, it returns garbage
|
||||
* with certain texture sizes. Macrotiling ensures the alignment. */
|
||||
if (!r300_texture(surf->base.texture)->mip_macrotile[surf->base.level])
|
||||
return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Clear currently bound buffers. */
|
||||
static void r300_clear(struct pipe_context* pipe,
|
||||
unsigned buffers,
|
||||
|
|
@ -124,16 +167,40 @@ static void r300_clear(struct pipe_context* pipe,
|
|||
struct r300_context* r300 = r300_context(pipe);
|
||||
struct pipe_framebuffer_state *fb =
|
||||
(struct pipe_framebuffer_state*)r300->fb_state.state;
|
||||
struct r300_hyperz_state *hyperz =
|
||||
(struct r300_hyperz_state*)r300->hyperz_state.state;
|
||||
uint32_t width = fb->width;
|
||||
uint32_t height = fb->height;
|
||||
|
||||
/* Enable CBZB clear. */
|
||||
if (r300_cbzb_clear_allowed(r300, buffers)) {
|
||||
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
|
||||
|
||||
hyperz->zb_depthclearvalue =
|
||||
r300_depth_clear_cb_value(surf->base.format, rgba);
|
||||
|
||||
width = surf->cbzb_width;
|
||||
height = surf->cbzb_height;
|
||||
|
||||
r300->cbzb_clear = TRUE;
|
||||
r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
|
||||
}
|
||||
|
||||
/* Clear. */
|
||||
r300_blitter_begin(r300, R300_CLEAR);
|
||||
util_blitter_clear(r300->blitter,
|
||||
fb->width,
|
||||
fb->height,
|
||||
width,
|
||||
height,
|
||||
fb->nr_cbufs,
|
||||
buffers, rgba, depth, stencil);
|
||||
r300_blitter_end(r300);
|
||||
|
||||
/* Disable CBZB clear. */
|
||||
if (r300->cbzb_clear) {
|
||||
r300->cbzb_clear = FALSE;
|
||||
r300_mark_fb_state_dirty(r300, R300_CHANGED_CBZB_FLAG);
|
||||
}
|
||||
|
||||
/* XXX this flush "fixes" a hardlock in the cubestorm xscreensaver */
|
||||
if (r300->flush_counter == 0)
|
||||
pipe->flush(pipe, 0, NULL);
|
||||
|
|
|
|||
|
|
@ -330,7 +330,7 @@ static void r300_init_states(struct pipe_context *pipe)
|
|||
BEGIN_CB(&hyperz->cb_begin, r300->hyperz_state.size);
|
||||
OUT_CB_REG(R300_ZB_BW_CNTL, 0);
|
||||
OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0);
|
||||
OUT_CB_REG(R300_SC_HYPERZ, 0x1C);
|
||||
OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2);
|
||||
END_CB;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -311,6 +311,13 @@ struct r300_surface {
|
|||
uint32_t offset; /* COLOROFFSET or DEPTHOFFSET. */
|
||||
uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
|
||||
uint32_t format; /* US_OUT_FMT or ZB_FORMAT. */
|
||||
|
||||
/* Parameters dedicated to the CBZB clear. */
|
||||
uint32_t cbzb_width; /* Aligned width. */
|
||||
uint32_t cbzb_height; /* Half of the height. */
|
||||
uint32_t cbzb_midpoint_offset; /* DEPTHOFFSET. */
|
||||
uint32_t cbzb_pitch; /* DEPTHPITCH. */
|
||||
uint32_t cbzb_format; /* ZB_FORMAT. */
|
||||
};
|
||||
|
||||
struct r300_texture {
|
||||
|
|
@ -525,6 +532,7 @@ struct r300_context {
|
|||
/* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
|
||||
boolean incompatible_vb_layout;
|
||||
|
||||
boolean cbzb_clear;
|
||||
/* upload managers */
|
||||
struct u_upload_mgr *upload_vb;
|
||||
struct u_upload_mgr *upload_ib;
|
||||
|
|
@ -593,7 +601,8 @@ void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
|
|||
|
||||
/* r300_state.c */
|
||||
enum r300_fb_state_change {
|
||||
R300_CHANGED_FB_STATE = 0
|
||||
R300_CHANGED_FB_STATE = 0,
|
||||
R300_CHANGED_CBZB_FLAG
|
||||
};
|
||||
|
||||
void r300_mark_fb_state_dirty(struct r300_context *r300,
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#include "r300_emit.h"
|
||||
#include "r300_fs.h"
|
||||
#include "r300_screen.h"
|
||||
#include "r300_texture.h"
|
||||
#include "r300_screen_buffer.h"
|
||||
#include "r300_vs.h"
|
||||
|
||||
|
|
@ -272,8 +273,17 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
|
|||
struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state;
|
||||
struct pipe_framebuffer_state* fb =
|
||||
(struct pipe_framebuffer_state*)r300->fb_state.state;
|
||||
uint32_t height = fb->height;
|
||||
uint32_t width = fb->width;
|
||||
CS_LOCALS(r300);
|
||||
|
||||
if (r300->cbzb_clear) {
|
||||
struct r300_surface *surf = r300_surface(fb->cbufs[0]);
|
||||
|
||||
height = surf->cbzb_height;
|
||||
width = surf->cbzb_width;
|
||||
}
|
||||
|
||||
BEGIN_CS(size);
|
||||
|
||||
/* Set up scissors.
|
||||
|
|
@ -281,13 +291,13 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state)
|
|||
OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
|
||||
if (r300->screen->caps.is_r500) {
|
||||
OUT_CS(0);
|
||||
OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) |
|
||||
((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
|
||||
OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) |
|
||||
((height - 1) << R300_SCISSORS_Y_SHIFT));
|
||||
} else {
|
||||
OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
|
||||
(1440 << R300_SCISSORS_Y_SHIFT));
|
||||
OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) |
|
||||
((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
|
||||
OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) |
|
||||
((height + 1440-1) << R300_SCISSORS_Y_SHIFT));
|
||||
}
|
||||
|
||||
/* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */
|
||||
|
|
@ -344,8 +354,20 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
|
|||
OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
|
||||
}
|
||||
|
||||
/* Set up the ZB part of the CBZB clear. */
|
||||
if (r300->cbzb_clear) {
|
||||
surf = r300_surface(fb->cbufs[0]);
|
||||
|
||||
OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format);
|
||||
|
||||
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
|
||||
OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain, 0);
|
||||
|
||||
OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
|
||||
OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain, 0);
|
||||
}
|
||||
/* Set up a zbuffer. */
|
||||
if (fb->zsbuf) {
|
||||
else if (fb->zsbuf) {
|
||||
surf = r300_surface(fb->zsbuf);
|
||||
|
||||
OUT_CS_REG(R300_ZB_FORMAT, surf->format);
|
||||
|
|
@ -377,6 +399,18 @@ void r300_emit_hyperz_state(struct r300_context *r300,
|
|||
WRITE_CS_TABLE(state, size);
|
||||
}
|
||||
|
||||
void r300_emit_hyperz_end(struct r300_context *r300)
|
||||
{
|
||||
struct r300_hyperz_state z =
|
||||
*(struct r300_hyperz_state*)r300->hyperz_state.state;
|
||||
|
||||
z.zb_bw_cntl = 0;
|
||||
z.zb_depthclearvalue = 0;
|
||||
z.sc_hyperz = R300_SC_HYPERZ_ADJ_2;
|
||||
|
||||
r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z);
|
||||
}
|
||||
|
||||
void r300_emit_fb_state_pipelined(struct r300_context *r300,
|
||||
unsigned size, void *state)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -45,6 +45,11 @@ void r300_emit_clip_state(struct r300_context* r300,
|
|||
void r300_emit_dsa_state(struct r300_context* r300,
|
||||
unsigned size, void* state);
|
||||
|
||||
void r300_emit_hyperz_state(struct r300_context *r300,
|
||||
unsigned size, void *state);
|
||||
|
||||
void r300_emit_hyperz_end(struct r300_context *r300);
|
||||
|
||||
void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
|
||||
|
||||
void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
|
||||
|
|
@ -64,9 +69,6 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
|
|||
|
||||
void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state);
|
||||
|
||||
void r300_emit_hyperz_state(struct r300_context *r300,
|
||||
unsigned size, void *state);
|
||||
|
||||
void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state);
|
||||
|
||||
void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ static void r300_flush(struct pipe_context* pipe,
|
|||
}
|
||||
|
||||
if (r300->dirty_hw) {
|
||||
r300_emit_hyperz_end(r300);
|
||||
r300_emit_query_end(r300);
|
||||
|
||||
r300->flush_counter++;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,22 @@
|
|||
#include "r300_reg.h"
|
||||
#include "r300_fs.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* The HyperZ setup */
|
||||
/*****************************************************************************/
|
||||
|
||||
static void r300_update_hyperz(struct r300_context* r300)
|
||||
{
|
||||
struct r300_hyperz_state *z =
|
||||
(struct r300_hyperz_state*)r300->hyperz_state.state;
|
||||
|
||||
z->zb_bw_cntl = 0;
|
||||
z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
|
||||
|
||||
if (r300->cbzb_clear)
|
||||
z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/* The ZTOP state */
|
||||
/*****************************************************************************/
|
||||
|
|
@ -118,4 +134,7 @@ static void r300_update_ztop(struct r300_context* r300)
|
|||
void r300_update_hyperz_state(struct r300_context* r300)
|
||||
{
|
||||
r300_update_ztop(r300);
|
||||
if (r300->hyperz_state.dirty) {
|
||||
r300_update_hyperz(r300);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -224,6 +224,7 @@ static void r300_prepare_for_rendering(struct r300_context *r300,
|
|||
|
||||
/* Emitted in flush. */
|
||||
end_dwords += 26; /* emit_query_end */
|
||||
end_dwords += r300->hyperz_state.size; /* emit_hyperz_end */
|
||||
|
||||
cs_dwords += end_dwords;
|
||||
|
||||
|
|
|
|||
|
|
@ -688,7 +688,9 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
|
|||
/* Now compute the fb_state atom size. */
|
||||
r300->fb_state.size = 2 + (8 * state->nr_cbufs);
|
||||
|
||||
if (state->zsbuf)
|
||||
if (r300->cbzb_clear)
|
||||
r300->fb_state.size += 10;
|
||||
else if (state->zsbuf)
|
||||
r300->fb_state.size += r300->screen->caps.has_hiz ? 18 : 14;
|
||||
|
||||
/* The size of the rest of atoms stays the same. */
|
||||
|
|
|
|||
|
|
@ -1034,6 +1034,8 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
|
|||
struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
|
||||
|
||||
if (surface) {
|
||||
uint32_t stride, offset, tile_height;
|
||||
|
||||
pipe_reference_init(&surface->base.reference, 1);
|
||||
pipe_resource_reference(&surface->base.texture, texture);
|
||||
surface->base.format = texture->format;
|
||||
|
|
@ -1054,6 +1056,34 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
|
|||
surface->offset = r300_texture_get_offset(tex, level, zslice, face);
|
||||
surface->pitch = tex->fb_state.pitch[level];
|
||||
surface->format = tex->fb_state.format;
|
||||
|
||||
/* Parameters for the CBZB clear. */
|
||||
surface->cbzb_width = align(surface->base.width, 64);
|
||||
|
||||
/* Height must be aligned to the size of a tile. */
|
||||
tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
|
||||
DIM_HEIGHT);
|
||||
surface->cbzb_height = align((surface->base.height + 1) / 2,
|
||||
tile_height);
|
||||
|
||||
/* Offset must be aligned to 2K and must point at the beginning
|
||||
* of a scanline. */
|
||||
stride = r300_texture_get_stride(r300_screen(screen), tex, level);
|
||||
offset = surface->offset + stride * surface->cbzb_height;
|
||||
surface->cbzb_midpoint_offset = offset & ~2047;
|
||||
|
||||
surface->cbzb_pitch = surface->pitch & 0x1ffffc;
|
||||
|
||||
if (util_format_get_blocksizebits(surface->base.format) == 32)
|
||||
surface->cbzb_format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
|
||||
else
|
||||
surface->cbzb_format = R300_DEPTHFORMAT_16BIT_INT_Z;
|
||||
|
||||
SCREEN_DBG(r300_screen(screen), DBG_TEX,
|
||||
"CBZB Dim: %ix%i, Misalignment: %i, Macro: %s\n",
|
||||
surface->cbzb_width, surface->cbzb_height,
|
||||
offset & 2047,
|
||||
tex->mip_macrotile[level] ? "YES" : " NO");
|
||||
}
|
||||
|
||||
return &surface->base;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue