mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-24 21:10:24 +01:00
freedreno/a6xx: pre-calculate expected vsc stream sizes
We should only rely on overflow detection for indirect draws, where we have no other option. This doesn't use quite the worst-possible-case sizes, which in practice seem to be ~20x larger than what is required. But instead uses roughly half of that. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4750>
This commit is contained in:
parent
99d802ccc7
commit
f561e516c8
8 changed files with 229 additions and 1 deletions
|
|
@ -195,6 +195,8 @@ a6xx_SOURCES := \
|
|||
a6xx/fd6_screen.h \
|
||||
a6xx/fd6_texture.c \
|
||||
a6xx/fd6_texture.h \
|
||||
a6xx/fd6_vsc.c \
|
||||
a6xx/fd6_vsc.h \
|
||||
a6xx/fd6_zsa.c \
|
||||
a6xx/fd6_zsa.h
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
#include "fd6_emit.h"
|
||||
#include "fd6_program.h"
|
||||
#include "fd6_format.h"
|
||||
#include "fd6_vsc.h"
|
||||
#include "fd6_zsa.h"
|
||||
|
||||
static void
|
||||
|
|
@ -192,6 +193,9 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
if (emit.key.gs)
|
||||
emit.key.key.has_gs = true;
|
||||
|
||||
if (!(emit.key.hs || emit.key.ds || emit.key.gs || info->indirect))
|
||||
fd6_vsc_update_sizes(ctx->batch, info);
|
||||
|
||||
fixup_shader_state(ctx, &emit.key.key);
|
||||
|
||||
if (!(ctx->dirty & FD_DIRTY_PROG)) {
|
||||
|
|
|
|||
|
|
@ -326,6 +326,27 @@ update_vsc_pipe(struct fd_batch *batch)
|
|||
struct fd_ringbuffer *ring = batch->gmem;
|
||||
int i;
|
||||
|
||||
if (batch->draw_strm_bits/8 > fd6_ctx->vsc_draw_strm_pitch) {
|
||||
if (fd6_ctx->vsc_draw_strm)
|
||||
fd_bo_del(fd6_ctx->vsc_draw_strm);
|
||||
fd6_ctx->vsc_draw_strm = NULL;
|
||||
/* Note: probably only need to align to 0x40, but aligning stronger
|
||||
* reduces the odds that we will have to realloc again on the next
|
||||
* frame:
|
||||
*/
|
||||
fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits/8, 0x4000);
|
||||
debug_printf("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x\n",
|
||||
fd6_ctx->vsc_draw_strm_pitch);
|
||||
}
|
||||
|
||||
if (batch->prim_strm_bits/8 > fd6_ctx->vsc_prim_strm_pitch) {
|
||||
if (fd6_ctx->vsc_prim_strm)
|
||||
fd_bo_del(fd6_ctx->vsc_prim_strm);
|
||||
fd6_ctx->vsc_prim_strm = NULL;
|
||||
fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits/8, 0x4000);
|
||||
debug_printf("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x\n",
|
||||
fd6_ctx->vsc_prim_strm_pitch);
|
||||
}
|
||||
|
||||
if (!fd6_ctx->vsc_draw_strm) {
|
||||
fd6_ctx->vsc_draw_strm = fd_bo_new(ctx->screen->dev,
|
||||
|
|
|
|||
160
src/gallium/drivers/freedreno/a6xx/fd6_vsc.c
Normal file
160
src/gallium/drivers/freedreno/a6xx/fd6_vsc.c
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Copyright © 2020 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_prim.h"
|
||||
|
||||
#include "freedreno_batch.h"
|
||||
#include "freedreno_gmem.h"
|
||||
|
||||
#include "fd6_vsc.h"
|
||||
|
||||
/*
|
||||
* Helper util to update expected vsc draw and primitive stream sizes, see
|
||||
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format
|
||||
*/
|
||||
|
||||
enum {
|
||||
byte = 8,
|
||||
dword = 4 * byte,
|
||||
} bits_per;
|
||||
|
||||
/**
|
||||
* Determine # of bits required to store a given number, see
|
||||
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#numbers
|
||||
*/
|
||||
static unsigned
|
||||
number_size_bits(unsigned nr)
|
||||
{
|
||||
unsigned n = util_last_bit(nr);
|
||||
assert(n); /* encoding 0 is not possible */
|
||||
return n + (n - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine # of bits requred to store a given bitfield, see
|
||||
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#bitfields
|
||||
*/
|
||||
static unsigned
|
||||
bitfield_size_bits(unsigned n)
|
||||
{
|
||||
return n + 1; /* worst case is always 1 + nr of bits */
|
||||
}
|
||||
|
||||
static unsigned
|
||||
prim_count(const struct pipe_draw_info *info)
|
||||
{
|
||||
/* PIPE_PRIM_MAX used internally for RECTLIST blits on 3d pipe: */
|
||||
unsigned vtx_per_prim = (info->mode == PIPE_PRIM_MAX) ? 2 :
|
||||
u_vertices_per_prim(info->mode);
|
||||
return (info->count * info->instance_count) / vtx_per_prim;
|
||||
}
|
||||
|
||||
/**
|
||||
* The primitive stream uses a run-length encoding, where each packet contains a
|
||||
* bitfield of bins covered and then the number of primitives which have the same
|
||||
* bitfield. Each packet consists of the following, in order:
|
||||
*
|
||||
* - The (compressed) bitfield of bins covered
|
||||
* - The number of primitives with this bitset
|
||||
* - Checksum
|
||||
*
|
||||
* The worst case would be that each primitive has a different bitmask. In practice,
|
||||
* assuming ever other primitive has a different bitmask still gets us conservatively
|
||||
* large primitive stream sizes. (Ie. 10x what is needed, vs. 20x)
|
||||
*
|
||||
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#primitive-streams
|
||||
*/
|
||||
static unsigned
|
||||
primitive_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins)
|
||||
{
|
||||
unsigned num_prims = prim_count(info);
|
||||
unsigned nbits =
|
||||
(bitfield_size_bits(num_bins) /* bitfield of bins covered */
|
||||
+ number_size_bits(1) /* number of primitives with this bitset */
|
||||
+ 1 /* checksum */
|
||||
) * DIV_ROUND_UP(num_prims, 2);
|
||||
return align(nbits, dword);
|
||||
}
|
||||
|
||||
/**
|
||||
* Each draw stream packet contains the following:
|
||||
*
|
||||
* - Bin bitfield
|
||||
* - Last instance bit
|
||||
* - If bitfield is empty, the number of draws it is empty for, otherwise
|
||||
* the size of the corresponding primitive stream in DWORD's.
|
||||
* - Checksum
|
||||
*
|
||||
* https://github.com/freedreno/freedreno/wiki/Visibility-Stream-Format#draw-streams
|
||||
*/
|
||||
static unsigned
|
||||
draw_stream_size_bits(const struct pipe_draw_info *info, unsigned num_bins,
|
||||
unsigned prim_strm_bits)
|
||||
{
|
||||
unsigned ndwords = prim_strm_bits / dword;
|
||||
assert(info->instance_count > 0);
|
||||
return (bitfield_size_bits(num_bins) /* bitfield of bins */
|
||||
+ 1 /* last-instance-bit */
|
||||
+ number_size_bits(ndwords) /* size of corresponding prim strm */
|
||||
+ 1 /* checksum */
|
||||
) * info->instance_count;
|
||||
}
|
||||
|
||||
void
|
||||
fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info)
|
||||
{
|
||||
if (!batch->num_bins_per_pipe) {
|
||||
batch->num_bins_per_pipe = fd_gmem_estimate_bins_per_pipe(batch);
|
||||
|
||||
/* This is a convenient spot to add the size of the final draw-
|
||||
* stream packet:
|
||||
*
|
||||
* If there are N bins, the final packet, after all the draws are
|
||||
* done, consists of a 1 followed by N + 17 0's, plus a final 1.
|
||||
* This uses the otherwise-unused pattern of a non-empty bitfield
|
||||
* (initial 1) that is nontheless empty (has all 0's)
|
||||
*/
|
||||
unsigned final_pkt_sz = 1 + batch->num_bins_per_pipe + 17 + 1;
|
||||
batch->prim_strm_bits = align(final_pkt_sz, dword);
|
||||
}
|
||||
|
||||
unsigned prim_strm_bits =
|
||||
primitive_stream_size_bits(info, batch->num_bins_per_pipe);
|
||||
unsigned draw_strm_bits =
|
||||
draw_stream_size_bits(info, batch->num_bins_per_pipe, prim_strm_bits);
|
||||
|
||||
#if 0
|
||||
printf("vsc: prim_strm_bits=%d, draw_strm_bits=%d, nb=%u, ic=%u, c=%u, pc=%u (%s)\n",
|
||||
prim_strm_bits, draw_strm_bits, batch->num_bins_per_pipe,
|
||||
info->instance_count, info->count,
|
||||
(info->count * info->instance_count) /
|
||||
u_vertices_per_prim(info->mode),
|
||||
u_prim_name(info->mode));
|
||||
#endif
|
||||
|
||||
batch->prim_strm_bits += prim_strm_bits;
|
||||
batch->draw_strm_bits += draw_strm_bits;
|
||||
}
|
||||
|
||||
29
src/gallium/drivers/freedreno/a6xx/fd6_vsc.h
Normal file
29
src/gallium/drivers/freedreno/a6xx/fd6_vsc.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright © 2020 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef FD6_VSC_H_
|
||||
#define FD6_VSC_H_
|
||||
|
||||
void fd6_vsc_update_sizes(struct fd_batch *batch, const struct pipe_draw_info *info);
|
||||
|
||||
#endif /* FD6_VSC_H_ */
|
||||
|
|
@ -84,6 +84,9 @@ batch_init(struct fd_batch *batch)
|
|||
batch->gmem_reason = 0;
|
||||
batch->num_draws = 0;
|
||||
batch->num_vertices = 0;
|
||||
batch->num_bins_per_pipe = 0;
|
||||
batch->prim_strm_bits = 0;
|
||||
batch->draw_strm_bits = 0;
|
||||
batch->stage = FD_STAGE_NULL;
|
||||
|
||||
fd_reset_wfi(batch);
|
||||
|
|
|
|||
|
|
@ -129,9 +129,16 @@ struct fd_batch {
|
|||
*/
|
||||
const struct fd_gmem_stateobj *gmem_state;
|
||||
|
||||
unsigned num_draws; /* number of draws in current batch */
|
||||
unsigned num_draws; /* number of draws in current batch */
|
||||
unsigned num_vertices; /* number of vertices in current batch */
|
||||
|
||||
/* Currently only used on a6xx, to calculate vsc prim/draw stream
|
||||
* sizes:
|
||||
*/
|
||||
unsigned num_bins_per_pipe;
|
||||
unsigned prim_strm_bits;
|
||||
unsigned draw_strm_bits;
|
||||
|
||||
/* Track the maximal bounds of the scissor of all the draws within a
|
||||
* batch. Used at the tile rendering step (fd_gmem_render_tiles(),
|
||||
* mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
|
||||
|
|
|
|||
|
|
@ -205,6 +205,8 @@ files_libfreedreno = files(
|
|||
'a6xx/fd6_screen.h',
|
||||
'a6xx/fd6_texture.c',
|
||||
'a6xx/fd6_texture.h',
|
||||
'a6xx/fd6_vsc.c',
|
||||
'a6xx/fd6_vsc.h',
|
||||
'a6xx/fd6_zsa.c',
|
||||
'a6xx/fd6_zsa.h',
|
||||
'ir3/ir3_cache.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue