mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 13:20:14 +01:00
v3dv: implement double-buffer mode
Double buffer mode splits the tile buffer size in half so we can start processing the next tile while the current one is being stored to memory. This mode is available only if MSAA is not enabled and can, in theory, improve performance by reducing tile store overhead, however, it comes at the cost of reducing the tile size, which also causes some overhead of its own. Testing shows that this helps some cases (i.e the Vulkan Quake ports) but hurts others (i.e. Unreal Engine 4), so for the time being we don't enable this by default but we allow to enable it selectively by using V3D_DEBUG. Reviewed-by: Juan A. Suarez <jasuarez@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14551>
This commit is contained in:
parent
821c66e50c
commit
b9f9474577
10 changed files with 61 additions and 17 deletions
|
|
@ -88,6 +88,8 @@ static const struct debug_named_value debug_control[] = {
|
|||
"Force 16-bit precision on all TMU operations" },
|
||||
{ "noloopunroll", V3D_DEBUG_NO_LOOP_UNROLL,
|
||||
"Disable loop unrolling" },
|
||||
{ "db", V3D_DEBUG_DOUBLE_BUFFER,
|
||||
"Enable double buffer for Tile Buffer when MSAA is disabled" },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ extern uint32_t V3D_DEBUG;
|
|||
#define V3D_DEBUG_TMU_16BIT (1 << 19)
|
||||
#define V3D_DEBUG_NO_LOOP_UNROLL (1 << 20)
|
||||
#define V3D_DEBUG_CL_NO_BIN (1 << 21)
|
||||
#define V3D_DEBUG_DOUBLE_BUFFER (1 << 22)
|
||||
|
||||
#define V3D_DEBUG_SHADERS (V3D_DEBUG_TGSI | V3D_DEBUG_NIR | \
|
||||
V3D_DEBUG_VIR | V3D_DEBUG_QPU | \
|
||||
|
|
|
|||
|
|
@ -88,8 +88,8 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
|
|||
}
|
||||
|
||||
void
|
||||
v3d_choose_tile_size(uint32_t color_attachment_count,
|
||||
uint32_t max_color_bpp, bool msaa,
|
||||
v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
|
||||
bool msaa, bool double_buffer,
|
||||
uint32_t *width, uint32_t *height)
|
||||
{
|
||||
static const uint8_t tile_sizes[] = {
|
||||
|
|
@ -108,8 +108,12 @@ v3d_choose_tile_size(uint32_t color_attachment_count,
|
|||
else if (color_attachment_count > 1)
|
||||
idx += 1;
|
||||
|
||||
/* MSAA and double-buffer are mutually exclusive */
|
||||
assert(!msaa || !double_buffer);
|
||||
if (msaa)
|
||||
idx += 2;
|
||||
else if (double_buffer)
|
||||
idx += 1;
|
||||
|
||||
idx += max_color_bpp;
|
||||
|
||||
|
|
|
|||
|
|
@ -35,8 +35,8 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
|
|||
uint32_t wg_size);
|
||||
|
||||
void
|
||||
v3d_choose_tile_size(uint32_t color_attachment_count,
|
||||
uint32_t max_color_bpp, bool msaa,
|
||||
v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
|
||||
bool msaa, bool double_buffer,
|
||||
uint32_t *width, uint32_t *height);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -441,8 +441,22 @@ job_compute_frame_tiling(struct v3dv_job *job,
|
|||
tiling->msaa = msaa;
|
||||
tiling->internal_bpp = max_internal_bpp;
|
||||
|
||||
v3d_choose_tile_size(render_target_count, max_internal_bpp, msaa,
|
||||
&tiling->tile_width, &tiling->tile_height);
|
||||
/* We can use double-buffer when MSAA is disabled to reduce tile store
|
||||
* overhead.
|
||||
*
|
||||
* FIXME: if we are emitting any tile loads the hardware will serialize
|
||||
* loads and stores across tiles effectivley disabling double buffering,
|
||||
* so we would want to check for that and not enable it in that case to
|
||||
* avoid reducing the tile size.
|
||||
*/
|
||||
tiling->double_buffer =
|
||||
unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
|
||||
|
||||
assert(!tiling->msaa || !tiling->double_buffer);
|
||||
|
||||
v3d_choose_tile_size(render_target_count, max_internal_bpp,
|
||||
tiling->msaa, tiling->double_buffer,
|
||||
&tiling->tile_width, &tiling->tile_height);
|
||||
|
||||
tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width);
|
||||
tiling->draw_tiles_y = DIV_ROUND_UP(height, tiling->tile_height);
|
||||
|
|
|
|||
|
|
@ -310,7 +310,9 @@ subpass_get_granularity(struct v3dv_device *device,
|
|||
}
|
||||
|
||||
uint32_t width, height;
|
||||
v3d_choose_tile_size(color_attachment_count, max_bpp, msaa, &width, &height);
|
||||
bool double_buffer = (V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa;
|
||||
v3d_choose_tile_size(color_attachment_count, max_bpp, msaa,
|
||||
double_buffer, &width, &height);
|
||||
*granularity = (VkExtent2D) {
|
||||
.width = width,
|
||||
.height = height
|
||||
|
|
|
|||
|
|
@ -745,6 +745,7 @@ struct v3dv_frame_tiling {
|
|||
uint32_t render_target_count;
|
||||
uint32_t internal_bpp;
|
||||
bool msaa;
|
||||
bool double_buffer;
|
||||
uint32_t tile_width;
|
||||
uint32_t tile_height;
|
||||
uint32_t draw_tiles_x;
|
||||
|
|
@ -765,6 +766,21 @@ bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
|
|||
struct v3dv_render_pass *pass,
|
||||
uint32_t subpass_idx);
|
||||
|
||||
/* Checks if we need to emit 2 initial tile clears for double buffer mode.
|
||||
* This happens when we render at least 2 tiles, because in this mode each
|
||||
* tile uses a different half of the tile buffer memory so we can have 2 tiles
|
||||
* in flight (one being stored to memory and the next being rendered). In this
|
||||
* scenario, if we emit a single initial tile clear we would only clear the
|
||||
* first half of the tile buffer.
|
||||
*/
|
||||
static inline bool
|
||||
v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
|
||||
{
|
||||
return tiling->double_buffer &&
|
||||
(tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
|
||||
tiling->layers > 1);
|
||||
}
|
||||
|
||||
struct v3dv_cmd_pool {
|
||||
struct vk_object_base base;
|
||||
|
||||
|
|
|
|||
|
|
@ -53,11 +53,13 @@ v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
|
|||
config.number_of_layers = layers;
|
||||
}
|
||||
|
||||
assert(!tiling->double_buffer || !tiling->msaa);
|
||||
cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
|
||||
config.width_in_pixels = tiling->width;
|
||||
config.height_in_pixels = tiling->height;
|
||||
config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
|
||||
config.multisample_mode_4x = tiling->msaa;
|
||||
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
}
|
||||
|
||||
|
|
@ -762,11 +764,13 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
*/
|
||||
bool do_early_zs_clear = false;
|
||||
const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
|
||||
assert(!tiling->msaa || !tiling->double_buffer);
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
|
||||
config.image_width_pixels = framebuffer->width;
|
||||
config.image_height_pixels = framebuffer->height;
|
||||
config.number_of_render_targets = MAX2(subpass->color_count, 1);
|
||||
config.multisample_mode_4x = tiling->msaa;
|
||||
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
|
||||
if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
|
||||
|
|
@ -944,12 +948,6 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
tiling->frame_height_in_supertiles;
|
||||
}
|
||||
|
||||
/* Start by clearing the tile buffer. */
|
||||
cl_emit(rcl, TILE_COORDINATES, coords) {
|
||||
coords.tile_column_number = 0;
|
||||
coords.tile_row_number = 0;
|
||||
}
|
||||
|
||||
/* Emit an initial clear of the tile buffers. This is necessary
|
||||
* for any buffers that should be cleared (since clearing
|
||||
* normally happens at the *end* of the generic tile list), but
|
||||
|
|
@ -964,13 +962,13 @@ v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
|
|||
* changes on V3D 3.x, and 2 dummy stores on 4.x.
|
||||
*/
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (i > 0)
|
||||
cl_emit(rcl, TILE_COORDINATES, coords);
|
||||
cl_emit(rcl, TILE_COORDINATES, coords);
|
||||
cl_emit(rcl, END_OF_LOADS, end);
|
||||
cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
|
||||
store.buffer_to_store = NONE;
|
||||
}
|
||||
if (i == 0 && cmd_buffer->state.tile_aligned_render_area) {
|
||||
if (cmd_buffer->state.tile_aligned_render_area &&
|
||||
(i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
|
||||
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
|
||||
clear.clear_z_stencil_buffer = !job->early_zs_clear;
|
||||
clear.clear_all_render_targets = true;
|
||||
|
|
|
|||
|
|
@ -50,12 +50,14 @@ emit_rcl_prologue(struct v3dv_job *job,
|
|||
if (job->cmd_buffer->state.oom)
|
||||
return NULL;
|
||||
|
||||
assert(!tiling->msaa || !tiling->double_buffer);
|
||||
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
|
||||
config.early_z_disable = true;
|
||||
config.image_width_pixels = tiling->width;
|
||||
config.image_height_pixels = tiling->height;
|
||||
config.number_of_render_targets = 1;
|
||||
config.multisample_mode_4x = tiling->msaa;
|
||||
config.double_buffer_in_non_ms_mode = tiling->double_buffer;
|
||||
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
|
||||
config.internal_depth_type = fb->internal_depth_type;
|
||||
}
|
||||
|
|
@ -166,7 +168,11 @@ emit_frame_setup(struct v3dv_job *job,
|
|||
cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
|
||||
store.buffer_to_store = NONE;
|
||||
}
|
||||
if (clear_value && i == 0) {
|
||||
/* When using double-buffering, we need to clear both buffers (unless
|
||||
* we only have a single tile to render).
|
||||
*/
|
||||
if (clear_value &&
|
||||
(i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
|
||||
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
|
||||
clear.clear_z_stencil_buffer = true;
|
||||
clear.clear_all_render_targets = true;
|
||||
|
|
|
|||
|
|
@ -266,6 +266,7 @@ v3d_get_tile_buffer_size(bool is_msaa,
|
|||
}
|
||||
|
||||
v3d_choose_tile_size(max_cbuf_idx + 1, *max_bpp, is_msaa,
|
||||
false /* double-buffer */,
|
||||
tile_width, tile_height);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue