r300g: dynamically ask for and release Hyper-Z access

We ask for Hyper-Z access when clearing a zbuffer.
We release it if no zbuffer clear has been done for 2 seconds.
This commit is contained in:
Marek Olšák 2011-05-07 19:55:45 +02:00
parent 904f43f190
commit fdd37af3f7
10 changed files with 223 additions and 93 deletions

View file

@ -206,23 +206,47 @@ static void r300_clear(struct pipe_context* pipe,
(struct r300_hyperz_state*)r300->hyperz_state.state;
uint32_t width = fb->width;
uint32_t height = fb->height;
boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ);
uint32_t hyperz_dcv = hyperz->zb_depthclearvalue;
/* Enable fast Z clear.
* The zbuffer must be in micro-tiled mode, otherwise it locks up. */
if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && can_hyperz) {
if (r300_fast_zclear_allowed(r300)) {
hyperz_dcv = hyperz->zb_depthclearvalue =
r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
boolean zmask_clear, hiz_clear;
r300_mark_atom_dirty(r300, &r300->zmask_clear);
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
zmask_clear = r300_fast_zclear_allowed(r300);
hiz_clear = r300_hiz_clear_allowed(r300);
if (r300_hiz_clear_allowed(r300)) {
r300->hiz_clear_value = r300_hiz_clear_value(depth);
r300_mark_atom_dirty(r300, &r300->hiz_clear);
/* If we need Hyper-Z. */
if (zmask_clear || hiz_clear) {
r300->num_z_clears++;
/* Try to obtain the access to Hyper-Z buffers if we don't have one. */
if (!r300->hyperz_enabled) {
r300->hyperz_enabled =
r300->rws->cs_request_feature(r300->cs,
RADEON_FID_HYPERZ_RAM_ACCESS,
TRUE);
if (r300->hyperz_enabled) {
/* Need to emit HyperZ buffer regs for the first time. */
r300_mark_fb_state_dirty(r300, R300_CHANGED_HYPERZ_FLAG);
}
}
/* Setup Hyper-Z clears. */
if (r300->hyperz_enabled) {
if (zmask_clear) {
hyperz_dcv = hyperz->zb_depthclearvalue =
r300_depth_clear_value(fb->zsbuf->format, depth, stencil);
r300_mark_atom_dirty(r300, &r300->zmask_clear);
buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
}
if (hiz_clear) {
r300->hiz_clear_value = r300_hiz_clear_value(depth);
r300_mark_atom_dirty(r300, &r300->hiz_clear);
}
}
}
}

View file

@ -26,6 +26,7 @@
#include "util/u_sampler.h"
#include "util/u_simple_list.h"
#include "util/u_upload_mgr.h"
#include "os/os_time.h"
#include "r300_cb.h"
#include "r300_context.h"
@ -95,6 +96,10 @@ static void r300_destroy_context(struct pipe_context* context)
{
struct r300_context* r300 = r300_context(context);
if (r300->cs && r300->hyperz_enabled) {
r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS, FALSE);
}
if (r300->blitter)
util_blitter_destroy(r300->blitter);
if (r300->draw)
@ -167,8 +172,6 @@ static boolean r300_setup_atoms(struct r300_context* r300)
boolean is_r500 = r300->screen->caps.is_r500;
boolean has_tcl = r300->screen->caps.has_tcl;
boolean drm_2_6_0 = r300->rws->get_value(r300->rws, RADEON_VID_DRM_2_6_0);
boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ);
boolean has_hiz_ram = r300->screen->caps.hiz_ram > 0;
/* Create the actual atom list.
*
@ -219,13 +222,10 @@ static boolean r300_setup_atoms(struct r300_context* r300)
/* TX. */
R300_INIT_ATOM(texture_cache_inval, 2);
R300_INIT_ATOM(textures_state, 0);
if (can_hyperz) {
/* HiZ Clear */
if (has_hiz_ram)
R300_INIT_ATOM(hiz_clear, 4);
/* zmask clear */
R300_INIT_ATOM(zmask_clear, 4);
}
/* HiZ Clear */
R300_INIT_ATOM(hiz_clear, r300->screen->caps.hiz_ram > 0 ? 4 : 0);
/* zmask clear */
R300_INIT_ATOM(zmask_clear, r300->screen->caps.zmask_ram > 0 ? 4 : 0);
/* ZB (unpipelined), SU. */
R300_INIT_ATOM(query_start, 4);
@ -503,6 +503,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
&dsa);
}
r300->hyperz_time_of_last_flush = os_time_get();
/* Print driver info. */
#ifdef DEBUG
{
@ -512,7 +514,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
fprintf(stderr,
"r300: DRM version: %d.%d.%d, Name: %s, ID: 0x%04x, GB: %d, Z: %d\n"
"r300: GART size: %d MB, VRAM size: %d MB\n"
"r300: AA compression: %s, Z compression: %s, HiZ: %s\n",
"r300: AA compression RAM: %s, Z compression RAM: %s, HiZ RAM: %s\n",
rws->get_value(rws, RADEON_VID_DRM_MAJOR),
rws->get_value(rws, RADEON_VID_DRM_MINOR),
rws->get_value(rws, RADEON_VID_DRM_PATCHLEVEL),
@ -522,10 +524,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
rws->get_value(rws, RADEON_VID_R300_Z_PIPES),
rws->get_value(rws, RADEON_VID_GART_SIZE) >> 20,
rws->get_value(rws, RADEON_VID_VRAM_SIZE) >> 20,
rws->get_value(rws, RADEON_VID_CAN_AACOMPRESS) ? "YES" : "NO",
rws->get_value(rws, RADEON_VID_CAN_HYPERZ) &&
"YES", /* XXX really? */
r300->screen->caps.zmask_ram ? "YES" : "NO",
rws->get_value(rws, RADEON_VID_CAN_HYPERZ) &&
r300->screen->caps.hiz_ram ? "YES" : "NO");
}

View file

@ -592,18 +592,6 @@ struct r300_context {
boolean frag_clamp;
/* Whether fast color clear is enabled. */
boolean cbzb_clear;
/* Whether ZMASK is enabled. */
boolean zmask_in_use;
/* Whether ZMASK is being decompressed. */
boolean zmask_decompress;
/* Whether ZMASK/HIZ is locked, i.e. should be disabled and cannot be taken over. */
struct pipe_surface *locked_zbuffer;
/* Whether HIZ is enabled. */
boolean hiz_in_use;
/* HiZ function. Can be either MIN or MAX. */
enum r300_hiz_func hiz_func;
/* HiZ clear value. */
uint32_t hiz_clear_value;
/* Whether fragment shader needs to be validated. */
enum r300_fs_validity_status fs_status;
/* Framebuffer multi-write. */
@ -627,6 +615,21 @@ struct r300_context {
int vertex_arrays_offset;
int vertex_arrays_instance_id;
boolean instancing_enabled;
/* Hyper-Z stats. */
boolean hyperz_enabled; /* Whether it owns Hyper-Z access. */
int64_t hyperz_time_of_last_flush; /* Time of the last flush with Z clear. */
unsigned num_z_clears; /* Since the last flush. */
/* ZMask state. */
boolean zmask_in_use; /* Whether ZMASK is enabled. */
boolean zmask_decompress; /* Whether ZMASK is being decompressed. */
struct pipe_surface *locked_zbuffer; /* Unbound zbuffer which still has data in ZMASK. */
/* HiZ state. */
boolean hiz_in_use; /* Whether HIZ is enabled. */
enum r300_hiz_func hiz_func; /* HiZ function. Can be either MIN or MAX. */
uint32_t hiz_clear_value; /* HiZ clear value. */
};
#define foreach_atom(r300, atom) \

View file

@ -375,7 +375,6 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state;
struct r300_surface* surf;
unsigned i;
boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ);
uint32_t rb3d_cctl = 0;
CS_LOCALS(r300);
@ -432,7 +431,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch);
OUT_CS_RELOC(surf);
if (can_hyperz) {
if (r300->hyperz_enabled) {
/* HiZ RAM. */
OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0);
OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz);

View file

@ -27,17 +27,46 @@
#include "util/u_simple_list.h"
#include "util/u_upload_mgr.h"
#include "os/os_time.h"
#include "r300_context.h"
#include "r300_cs.h"
#include "r300_emit.h"
static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags)
{
struct r300_atom *atom;
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
if (r300->screen->caps.is_r500)
r500_emit_index_bias(r300, 0);
r300->flush_counter++;
r300->rws->cs_flush(r300->cs, flags);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
foreach_atom(r300, atom) {
if (atom->state || atom->allow_null_state) {
r300_mark_atom_dirty(r300, atom);
}
}
r300->vertex_arrays_dirty = TRUE;
/* Unmark HWTCL state for SWTCL. */
if (!r300->screen->caps.has_tcl) {
r300->vs_state.dirty = FALSE;
r300->vs_constants.dirty = FALSE;
}
}
void r300_flush(struct pipe_context *pipe,
unsigned flags,
struct pipe_fence_handle **fence)
{
struct r300_context *r300 = r300_context(pipe);
struct r300_atom *atom;
struct pb_buffer **rfence = (struct pb_buffer**)fence;
if (r300->draw && !r300->draw_vbo_locked)
@ -56,32 +85,11 @@ void r300_flush(struct pipe_context *pipe,
}
if (r300->dirty_hw) {
r300_emit_hyperz_end(r300);
r300_emit_query_end(r300);
if (r300->screen->caps.is_r500)
r500_emit_index_bias(r300, 0);
r300->flush_counter++;
r300->rws->cs_flush(r300->cs, flags);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
foreach_atom(r300, atom) {
if (atom->state || atom->allow_null_state) {
r300_mark_atom_dirty(r300, atom);
}
}
r300->vertex_arrays_dirty = TRUE;
/* Unmark HWTCL state for SWTCL. */
if (!r300->screen->caps.has_tcl) {
r300->vs_state.dirty = FALSE;
r300->vs_constants.dirty = FALSE;
}
r300_flush_and_cleanup(r300, flags);
} else {
if (rfence) {
/* We have to create a fence object, but the command stream is empty
* and we cannot emit an empty CS. We must write some regs then. */
* and we cannot emit an empty CS. Let's write to some reg. */
CS_LOCALS(r300);
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
r300->rws->cs_flush(r300->cs, flags);
@ -91,6 +99,32 @@ void r300_flush(struct pipe_context *pipe,
r300->rws->cs_flush(r300->cs, flags);
}
}
/* Update Hyper-Z status. */
if (r300->num_z_clears) {
r300->hyperz_time_of_last_flush = os_time_get();
} else if (!r300->hyperz_time_of_last_flush > 2000000) {
/* 2 seconds without a Z clear pretty much means a dead context
* for HyperZ. */
r300->hiz_in_use = FALSE;
/* Decompress Z buffer. */
if (r300->zmask_in_use) {
if (r300->locked_zbuffer) {
r300_decompress_zmask_locked(r300);
} else {
r300_decompress_zmask(r300);
}
r300_flush_and_cleanup(r300, flags);
}
/* Release HyperZ. */
r300->rws->cs_request_feature(r300->cs, RADEON_FID_HYPERZ_RAM_ACCESS,
FALSE);
}
r300->num_z_clears = 0;
}
static void r300_flush_wrapped(struct pipe_context *pipe,

View file

@ -151,8 +151,7 @@ static void r300_update_hyperz(struct r300_context* r300)
return;
}
if (!zstex ||
!r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ))
if (!zstex || !r300->hyperz_enabled)
return;
/* Zbuffer compression. */

View file

@ -768,7 +768,6 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
enum r300_fb_state_change change)
{
struct pipe_framebuffer_state *state = r300->fb_state.state;
boolean can_hyperz = r300->rws->get_value(r300->rws, RADEON_VID_CAN_HYPERZ);
r300_mark_atom_dirty(r300, &r300->gpu_flush);
r300_mark_atom_dirty(r300, &r300->fb_state);
@ -797,7 +796,7 @@ void r300_mark_fb_state_dirty(struct r300_context *r300,
r300->fb_state.size += 10;
else if (state->zsbuf) {
r300->fb_state.size += 10;
if (can_hyperz)
if (r300->hyperz_enabled)
r300->fb_state.size += 8;
}

View file

@ -48,22 +48,59 @@
#define RADEON_INFO_WANT_CMASK 8
#endif
/* Enable/disable feature access. Return TRUE on success. */
static boolean radeon_set_fd_access(int fd, unsigned request, boolean enable)
/* Enable/disable feature access for one command stream.
* If enable == TRUE, return TRUE on success.
* Otherwise, return FALSE.
*
* We basically do the same thing kernel does, because we have to deal
* with multiple contexts (here command streams) backed by one winsys. */
static boolean radeon_set_fd_access(struct radeon_drm_cs *applier,
struct radeon_drm_cs **owner,
pipe_mutex *mutex,
unsigned request, boolean enable)
{
struct drm_radeon_info info = {0};
unsigned value = enable ? 1 : 0;
pipe_mutex_lock(*mutex);
/* Early exit if we are sure the request will fail. */
if (enable) {
if (*owner) {
pipe_mutex_unlock(*mutex);
return FALSE;
}
} else {
if (*owner != applier) {
pipe_mutex_unlock(*mutex);
return FALSE;
}
}
/* Pass through the request to the kernel. */
info.value = (unsigned long)&value;
info.request = request;
if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0)
if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO,
&info, sizeof(info)) != 0) {
pipe_mutex_unlock(*mutex);
return FALSE;
}
if (enable && !value)
return FALSE;
/* Update the rights in the winsys. */
if (enable) {
if (value) {
*owner = applier;
fprintf(stderr, "radeon: Acquired Hyper-Z.\n");
pipe_mutex_unlock(*mutex);
return TRUE;
}
} else {
*owner = NULL;
fprintf(stderr, "radeon: Released Hyper-Z.\n");
}
return TRUE;
pipe_mutex_unlock(*mutex);
return FALSE;
}
/* Helper function to do the ioctls needed for setup and init. */
@ -138,16 +175,6 @@ static void do_ioctls(struct radeon_drm_winsys *winsys)
}
winsys->z_pipes = target;
if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) {
winsys->hyperz = radeon_set_fd_access(winsys->fd,
RADEON_INFO_WANT_HYPERZ, TRUE);
}
if (debug_get_bool_option("RADEON_CMASK", FALSE)) {
winsys->aacompress = radeon_set_fd_access(winsys->fd,
RADEON_INFO_WANT_CMASK, TRUE);
}
retval = drmCommandWriteRead(winsys->fd, DRM_RADEON_GEM_INFO,
&gem_info, sizeof(gem_info));
if (retval) {
@ -167,6 +194,9 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
{
struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
pipe_mutex_destroy(ws->hyperz_owner_mutex);
pipe_mutex_destroy(ws->cmask_owner_mutex);
ws->cman->destroy(ws->cman);
ws->kman->destroy(ws->kman);
FREE(rws);
@ -198,14 +228,38 @@ static uint32_t radeon_get_value(struct radeon_winsys *rws,
return ws->drm_major*100 + ws->drm_minor >= 206;
case RADEON_VID_DRM_2_8_0:
return ws->drm_major*100 + ws->drm_minor >= 208;
case RADEON_VID_CAN_HYPERZ:
return ws->hyperz;
case RADEON_VID_CAN_AACOMPRESS:
return ws->aacompress;
}
return 0;
}
static boolean radeon_cs_request_feature(struct radeon_winsys_cs *rcs,
enum radeon_feature_id fid,
boolean enable)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
switch (fid) {
case RADEON_FID_HYPERZ_RAM_ACCESS:
if (debug_get_bool_option("RADEON_HYPERZ", FALSE)) {
return radeon_set_fd_access(cs, &cs->ws->hyperz_owner,
&cs->ws->hyperz_owner_mutex,
RADEON_INFO_WANT_HYPERZ, enable);
} else {
return FALSE;
}
case RADEON_FID_CMASK_RAM_ACCESS:
if (debug_get_bool_option("RADEON_CMASK", FALSE)) {
return radeon_set_fd_access(cs, &cs->ws->cmask_owner,
&cs->ws->cmask_owner_mutex,
RADEON_INFO_WANT_CMASK, enable);
} else {
return FALSE;
}
}
return FALSE;
}
struct radeon_winsys *radeon_drm_winsys_create(int fd)
{
struct radeon_drm_winsys *ws = CALLOC_STRUCT(radeon_drm_winsys);
@ -231,10 +285,14 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd)
/* Set functions. */
ws->base.destroy = radeon_winsys_destroy;
ws->base.get_value = radeon_get_value;
ws->base.cs_request_feature = radeon_cs_request_feature;
radeon_bomgr_init_functions(ws);
radeon_drm_cs_init_functions(ws);
pipe_mutex_init(ws->hyperz_owner_mutex);
pipe_mutex_init(ws->cmask_owner_mutex);
return &ws->base;
fail:

View file

@ -32,6 +32,8 @@
#include "radeon_winsys.h"
#include "os/os_thread.h"
struct radeon_drm_winsys {
struct radeon_winsys base;
@ -52,10 +54,10 @@ struct radeon_drm_winsys {
unsigned drm_minor;
unsigned drm_patchlevel;
/* Hyper-Z user */
boolean hyperz;
/* AA compression (CMask) */
boolean aacompress;
struct radeon_drm_cs *hyperz_owner;
pipe_mutex hyperz_owner_mutex;
struct radeon_drm_cs *cmask_owner;
pipe_mutex cmask_owner_mutex;
};
static INLINE struct radeon_drm_winsys *

View file

@ -87,9 +87,11 @@ enum radeon_value_id {
* - TBD
*/
RADEON_VID_DRM_2_8_0,
};
RADEON_VID_CAN_HYPERZ, /* ZMask + HiZ */
RADEON_VID_CAN_AACOMPRESS, /* CMask */
enum radeon_feature_id {
RADEON_FID_HYPERZ_RAM_ACCESS, /* ZMask + HiZ */
RADEON_FID_CMASK_RAM_ACCESS,
};
struct radeon_winsys {
@ -314,6 +316,16 @@ struct radeon_winsys {
*/
boolean (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs,
struct radeon_winsys_cs_handle *buf);
/**
* Request access to a feature for a command stream.
*
* \param cs A command stream.
* \param fid A winsys buffer.
*/
boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
enum radeon_feature_id fid,
boolean enable);
};
#endif