freedreno/a6xx: pre-calculate expected vsc stream sizes

We should only rely on overflow detection for indirect draws, where we
have no other option.

This doesn't use quite the worst-possible-case sizes, which in practice
seem to be ~20x larger than what is required.  But instead uses roughly
half of that.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4750>
This commit is contained in:
Rob Clark 2020-04-25 12:16:35 -07:00 committed by Marge Bot
parent 99d802ccc7
commit f561e516c8
8 changed files with 229 additions and 1 deletions

View file

@ -195,6 +195,8 @@ a6xx_SOURCES := \
a6xx/fd6_screen.h \
a6xx/fd6_texture.c \
a6xx/fd6_texture.h \
a6xx/fd6_vsc.c \
a6xx/fd6_vsc.h \
a6xx/fd6_zsa.c \
a6xx/fd6_zsa.h

View file

@ -38,6 +38,7 @@
#include "fd6_emit.h"
#include "fd6_program.h"
#include "fd6_format.h"
#include "fd6_vsc.h"
#include "fd6_zsa.h"
static void
@ -192,6 +193,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
if (emit.key.gs)
emit.key.key.has_gs = true;
if (!(emit.key.hs || emit.key.ds || emit.key.gs || info->indirect))
fd6_vsc_update_sizes(ctx->batch, info);
fixup_shader_state(ctx, &emit.key.key);
if (!(ctx->dirty & FD_DIRTY_PROG)) {

View file

@ -326,6 +326,27 @@ update_vsc_pipe(struct fd_batch *batch)
struct fd_ringbuffer *ring = batch->gmem;
int i;
if (batch->draw_strm_bits/8 > fd6_ctx->vsc_draw_strm_pitch) {
if (fd6_ctx->vsc_draw_strm)
fd_bo_del(fd6_ctx->vsc_draw_strm);
fd6_ctx->vsc_draw_strm = NULL;
/* Note: probably only need to align to 0x40, but aligning stronger
* reduces the odds that we will have to realloc again on the next
* frame:
*/
fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits/8, 0x4000);
debug_printf("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x\n",
fd6_ctx->vsc_draw_strm_pitch);
}
if (batch->prim_strm_bits/8 > fd6_ctx->vsc_prim_strm_pitch) {
if (fd6_ctx->vsc_prim_strm)
fd_bo_del(fd6_ctx->vsc_prim_strm);
fd6_ctx->vsc_prim_strm = NULL;
fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits/8, 0x4000);
debug_printf("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x\n",
fd6_ctx->vsc_prim_strm_pitch);
}
if (!fd6_ctx->vsc_draw_strm) {
fd6_ctx->vsc_draw_strm = fd_bo_new(ctx->screen->dev,

View file

@ -0,0 +1,160 @@
/*
* Copyright © 2020 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "pipe/p_state.h"
#include "util/u_prim.h"
#include "freedreno_batch.h"
#include "freedreno_gmem.h"
#include "fd6_vsc.h"
/*
* Helper util to update expected vsc draw and primitive stream sizes, see
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format
*/
enum {
byte = 8,
dword = 4 * byte,
} bits_per;
/**
* Determine # of bits required to store a given number, see
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#numbers
*/
static unsigned
number_size_bits(unsigned nr)
{
unsigned n = util_last_bit(nr);
assert(n); /* encoding 0 is not possible */
return n + (n - 1);
}
/**
* Determine # of bits requred to store a given bitfield, see
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#bitfields
*/
static unsigned
bitfield_size_bits(unsigned n)
{
return n + 1; /* worst case is always 1 + nr of bits */
}
static unsigned
prim_count(const struct pipe_draw_info *info)
{
/* PIPE_PRIM_MAX used internally for RECTLIST blits on 3d pipe: */
unsigned vtx_per_prim = (info->mode == PIPE_PRIM_MAX) ? 2 :
u_vertices_per_prim(info->mode);
return (info->count * info->instance_count) / vtx_per_prim;
}
/**
* The primitive stream uses a run-length encoding, where each packet contains a
* bitfield of bins covered and then the number of primitives which have the same
* bitfield. Each packet consists of the following, in order:
*
* - The (compressed) bitfield of bins covered
* - The number of primitives with this bitset
* - Checksum
*
* The worst case would be that each primitive has a different bitmask. In practice,
* assuming ever other primitive has a different bitmask still gets us conservatively
* large primitive stream sizes. (Ie. 10x what is needed, vs. 20x)
*
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#primitive-streams
*/
static unsigned
primitive_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins)
{
unsigned num_prims = prim_count(info);
unsigned nbits =
(bitfield_size_bits(num_bins) /* bitfield of bins covered */
+ number_size_bits(1) /* number of primitives with this bitset */
+ 1 /* checksum */
) * DIV_ROUND_UP(num_prims, 2);
return align(nbits, dword);
}
/**
* Each draw stream packet contains the following:
*
* - Bin bitfield
* - Last instance bit
* - If bitfield is empty, the number of draws it is empty for, otherwise
* the size of the corresponding primitive stream in DWORD's.
* - Checksum
*
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#draw-streams
*/
static unsigned
draw_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins,
unsigned prim_strm_bits)
{
unsigned ndwords = prim_strm_bits / dword;
assert(info->instance_count > 0);
return (bitfield_size_bits(num_bins) /* bitfield of bins */
+ 1 /* last-instance-bit */
+ number_size_bits(ndwords) /* size of corresponding prim strm */
+ 1 /* checksum */
) * info->instance_count;
}
void
fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info)
{
if (!batch->num_bins_per_pipe) {
batch->num_bins_per_pipe = fd_gmem_estimate_bins_per_pipe(batch);
/* This is a convenient spot to add the size of the final draw-
* stream packet:
*
* If there are N bins, the final packet, after all the draws are
* done, consists of a 1 followed by N + 17 0's, plus a final 1.
* This uses the otherwise-unused pattern of a non-empty bitfield
* (initial 1) that is nontheless empty (has all 0's)
*/
unsigned final_pkt_sz = 1 + batch->num_bins_per_pipe + 17 + 1;
batch->prim_strm_bits = align(final_pkt_sz, dword);
}
unsigned prim_strm_bits =
primitive_stream_size_bits(info, batch->num_bins_per_pipe);
unsigned draw_strm_bits =
draw_stream_size_bits(info, batch->num_bins_per_pipe, prim_strm_bits);
#if 0
printf("vsc: prim_strm_bits=%d, draw_strm_bits=%d, nb=%u, ic=%u, c=%u, pc=%u (%s)\n",
prim_strm_bits, draw_strm_bits, batch->num_bins_per_pipe,
info->instance_count, info->count,
(info->count * info->instance_count) /
u_vertices_per_prim(info->mode),
u_prim_name(info->mode));
#endif
batch->prim_strm_bits += prim_strm_bits;
batch->draw_strm_bits += draw_strm_bits;
}

View file

@ -0,0 +1,29 @@
/*
* Copyright © 2020 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef FD6_VSC_H_
#define FD6_VSC_H_
void fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info);
#endif /* FD6_VSC_H_ */

View file

@ -84,6 +84,9 @@ batch_init(struct fd_batch *batch)
batch->gmem_reason = 0;
batch->num_draws = 0;
batch->num_vertices = 0;
batch->num_bins_per_pipe = 0;
batch->prim_strm_bits = 0;
batch->draw_strm_bits = 0;
batch->stage = FD_STAGE_NULL;
fd_reset_wfi(batch);

View file

@ -129,9 +129,16 @@ struct fd_batch {
*/
const struct fd_gmem_stateobj *gmem_state;
unsigned num_draws; /* number of draws in current batch */
unsigned num_draws; /* number of draws in current batch */
unsigned num_vertices; /* number of vertices in current batch */
/* Currently only used on a6xx, to calculate vsc prim/draw stream
* sizes:
*/
unsigned num_bins_per_pipe;
unsigned prim_strm_bits;
unsigned draw_strm_bits;
/* Track the maximal bounds of the scissor of all the draws within a
* batch. Used at the tile rendering step (fd_gmem_render_tiles(),
* mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.

View file

@ -205,6 +205,8 @@ files_libfreedreno = files(
'a6xx/fd6_screen.h',
'a6xx/fd6_texture.c',
'a6xx/fd6_texture.h',
'a6xx/fd6_vsc.c',
'a6xx/fd6_vsc.h',
'a6xx/fd6_zsa.c',
'a6xx/fd6_zsa.h',
'ir3/ir3_cache.c',