mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 13:40:11 +01:00
freedreno/a6xx: Allocate a fixed-size tess factor BO.
Saves per-batch allocations, avoids reallocation for various vertex counts, and avoids needing the indirect tess addrs constobj so that we could emit the relocs to the tess BO after we'd emitted all the draws. Also apparently it fixes one of our CTS fails. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13851>
This commit is contained in:
parent
577a0a7352
commit
d7226e9a9e
10 changed files with 48 additions and 83 deletions
|
|
@ -17,10 +17,6 @@ KHR-GLES31.core.geometry_shader.layered_framebuffer.depth_support,Fail
|
|||
|
||||
KHR-GLES31.core.geometry_shader.layered_framebuffer.stencil_support,Fail
|
||||
|
||||
# " Pixel data comparison failed; expected: (0.1, 0.2, 0.3, 0.4) rendered: (0, 0, 0, 0) epsilon: 0.00392157
|
||||
# Pixel data comparison failed at esextcTessellationShaderPoints.cpp:597"
|
||||
KHR-GLES31.core.tessellation_shader.tessellation_shader_point_mode.point_rendering,Fail
|
||||
|
||||
# "Invalid value returned: expected:[1, 1, 1, 1] retrieved: [0, 0, 0, 0
|
||||
# Invalid rendering result at esextcTessellationShaderBarrier.cpp:504"
|
||||
KHR-GLES31.core.tessellation_shader.tessellation_shader_tc_barriers.barrier_guarded_read_calls,Fail
|
||||
|
|
|
|||
|
|
@ -108,21 +108,23 @@ emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
|
|||
}
|
||||
|
||||
static void
|
||||
emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit,
|
||||
emit_tess_bos(struct fd_screen *screen, struct fd_ringbuffer *ring,
|
||||
struct ir3_shader_variant *s) assert_dt
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
const struct ir3_const_state *const_state = ir3_const_state(s);
|
||||
const unsigned regid = const_state->offsets.primitive_param * 4 + 4;
|
||||
uint32_t dwords = 16;
|
||||
uint32_t dwords = 8;
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
|
||||
OUT_PKT7(ring, fd6_stage2opcode(s->type), 7);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4));
|
||||
OUT_RB(ring, ctx->batch->tess_addrs_constobj);
|
||||
OUT_RING(ring, 0);
|
||||
OUT_RING(ring, 0);
|
||||
OUT_RELOC(ring, screen->tess_bo, FD6_TESS_FACTOR_SIZE, 0, 0);
|
||||
OUT_RELOC(ring, screen->tess_bo, 0, 0, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -166,7 +168,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
|
|||
|
||||
emit_stage_tess_consts(constobj, emit->hs, hs_params,
|
||||
ARRAY_SIZE(hs_params));
|
||||
emit_tess_bos(constobj, emit, emit->hs);
|
||||
emit_tess_bos(ctx->screen, constobj, emit->hs);
|
||||
|
||||
if (emit->gs)
|
||||
num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
|
||||
|
|
@ -179,7 +181,7 @@ fd6_build_tess_consts(struct fd6_emit *emit)
|
|||
|
||||
emit_stage_tess_consts(constobj, emit->ds, ds_params,
|
||||
ARRAY_SIZE(ds_params));
|
||||
emit_tess_bos(constobj, emit, emit->ds);
|
||||
emit_tess_bos(ctx->screen, constobj, emit->ds);
|
||||
}
|
||||
|
||||
if (emit->gs) {
|
||||
|
|
|
|||
|
|
@ -261,41 +261,16 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
|
|||
draw0.prim_type = DI_PT_PATCHES0 + ctx->patch_vertices;
|
||||
draw0.tess_enable = true;
|
||||
|
||||
const unsigned max_count = 2048;
|
||||
unsigned count;
|
||||
|
||||
/**
|
||||
* We can cap tessparam/tessfactor buffer sizes at the sub-draw
|
||||
* limit. But in the indirect-draw case we must assume the worst.
|
||||
*/
|
||||
if (indirect && indirect->buffer) {
|
||||
count = ALIGN_NPOT(max_count, ctx->patch_vertices);
|
||||
} else {
|
||||
count = MIN2(max_count, draw->count);
|
||||
count = ALIGN_NPOT(count, ctx->patch_vertices);
|
||||
}
|
||||
/* maximum number of patches that can fit in tess factor/param buffers */
|
||||
uint32_t subdraw_size = MIN2(FD6_TESS_FACTOR_SIZE / factor_stride,
|
||||
FD6_TESS_PARAM_SIZE / (emit.hs->output_size * 4));
|
||||
/* convert from # of patches to draw count */
|
||||
subdraw_size *= ctx->patch_vertices;
|
||||
|
||||
OUT_PKT7(ring, CP_SET_SUBDRAW_SIZE, 1);
|
||||
OUT_RING(ring, count);
|
||||
OUT_RING(ring, subdraw_size);
|
||||
|
||||
ctx->batch->tessellation = true;
|
||||
ctx->batch->tessparam_size =
|
||||
MAX2(ctx->batch->tessparam_size, emit.hs->output_size * 4 * count);
|
||||
ctx->batch->tessfactor_size =
|
||||
MAX2(ctx->batch->tessfactor_size, factor_stride * count);
|
||||
|
||||
if (!ctx->batch->tess_addrs_constobj) {
|
||||
/* Reserve space for the bo address - we'll write them later in
|
||||
* setup_tess_buffers(). We need 2 bo address, but indirect
|
||||
* constant upload needs at least 4 vec4s.
|
||||
*/
|
||||
unsigned size = 4 * 16;
|
||||
|
||||
ctx->batch->tess_addrs_constobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, size, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
ctx->batch->tess_addrs_constobj->cur += size;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t index_start = info->index_size ? draw->index_bias : draw->start;
|
||||
|
|
|
|||
|
|
@ -1339,6 +1339,15 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
|
|||
OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
/* This happens after all drawing has been emitted to the draw CS, so we know
|
||||
* whether we need the tess BO pointers.
|
||||
*/
|
||||
if (batch->tessellation) {
|
||||
assert(screen->tess_bo);
|
||||
OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
|
||||
OUT_RELOC(ring, screen->tess_bo, 0, 0, 0);
|
||||
}
|
||||
|
||||
if (!batch->nondraw) {
|
||||
trace_end_state_restore(&batch->trace, ring);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1552,25 +1552,6 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
|
|||
trace_end_clear_restore(&batch->trace, ring);
|
||||
}
|
||||
|
||||
static void
|
||||
setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring)
|
||||
{
|
||||
struct fd_context *ctx = batch->ctx;
|
||||
|
||||
batch->tessfactor_bo = fd_bo_new(ctx->screen->dev, batch->tessfactor_size,
|
||||
0, "tessfactor");
|
||||
|
||||
batch->tessparam_bo = fd_bo_new(ctx->screen->dev, batch->tessparam_size,
|
||||
0, "tessparam");
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2);
|
||||
OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0);
|
||||
|
||||
batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start;
|
||||
OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0);
|
||||
OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
|
||||
{
|
||||
|
|
@ -1612,9 +1593,6 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
|
|||
OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
|
||||
emit_marker6(ring, 7);
|
||||
|
||||
if (batch->tessellation)
|
||||
setup_tess_buffers(batch, ring);
|
||||
|
||||
OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
|
||||
OUT_RING(ring, 0x0);
|
||||
|
||||
|
|
|
|||
|
|
@ -1206,6 +1206,7 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
|
|||
const struct ir3_shader_key *key) in_dt
|
||||
{
|
||||
struct fd_context *ctx = fd_context(data);
|
||||
struct fd_screen *screen = ctx->screen;
|
||||
struct fd6_program_state *state = CALLOC_STRUCT(fd6_program_state);
|
||||
|
||||
tc_assert_driver_thread(ctx->tc);
|
||||
|
|
@ -1233,6 +1234,19 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs,
|
|||
}
|
||||
#endif
|
||||
|
||||
if (hs) {
|
||||
/* Allocate the fixed-size tess factor BO globally on the screen. This
|
||||
* lets the program (which ideally we would have shared across contexts,
|
||||
* though the current ir3_cache impl doesn't do that) bake in the
|
||||
* addresses.
|
||||
*/
|
||||
fd_screen_lock(screen);
|
||||
if (!screen->tess_bo)
|
||||
screen->tess_bo =
|
||||
fd_bo_new(screen->dev, FD6_TESS_BO_SIZE, 0, "tessfactor");
|
||||
fd_screen_unlock(screen);
|
||||
}
|
||||
|
||||
setup_config_stateobj(ctx, state);
|
||||
setup_stateobj(state->binning_stateobj, ctx, state, key, true);
|
||||
setup_stateobj(state->stateobj, ctx, state, key, false);
|
||||
|
|
|
|||
|
|
@ -178,12 +178,6 @@ cleanup_submit(struct fd_batch *batch)
|
|||
batch->tile_fini = NULL;
|
||||
}
|
||||
|
||||
if (batch->tessellation) {
|
||||
fd_bo_del(batch->tessfactor_bo);
|
||||
fd_bo_del(batch->tessparam_bo);
|
||||
fd_ringbuffer_del(batch->tess_addrs_constobj);
|
||||
}
|
||||
|
||||
fd_submit_del(batch->submit);
|
||||
batch->submit = NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -255,18 +255,6 @@ struct fd_batch {
|
|||
|
||||
/** set of dependent batches.. holds refs to dependent batches: */
|
||||
uint32_t dependents_mask;
|
||||
|
||||
/* Buffer for tessellation engine input
|
||||
*/
|
||||
struct fd_bo *tessfactor_bo;
|
||||
uint32_t tessfactor_size;
|
||||
|
||||
/* Buffer for passing parameters between TCS and TES
|
||||
*/
|
||||
struct fd_bo *tessparam_bo;
|
||||
uint32_t tessparam_size;
|
||||
|
||||
struct fd_ringbuffer *tess_addrs_constobj;
|
||||
};
|
||||
|
||||
struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
|
||||
|
|
|
|||
|
|
@ -144,6 +144,9 @@ fd_screen_destroy(struct pipe_screen *pscreen)
|
|||
{
|
||||
struct fd_screen *screen = fd_screen(pscreen);
|
||||
|
||||
if (screen->tess_bo)
|
||||
fd_bo_del(screen->tess_bo);
|
||||
|
||||
if (screen->pipe)
|
||||
fd_pipe_del(screen->pipe);
|
||||
|
||||
|
|
|
|||
|
|
@ -149,6 +149,12 @@ struct fd_screen {
|
|||
|
||||
struct renderonly *ro;
|
||||
|
||||
/* the blob seems to always use 8K factor and 128K param sizes, copy them */
|
||||
#define FD6_TESS_FACTOR_SIZE (8 * 1024)
|
||||
#define FD6_TESS_PARAM_SIZE (128 * 1024)
|
||||
#define FD6_TESS_BO_SIZE (FD6_TESS_FACTOR_SIZE + FD6_TESS_PARAM_SIZE)
|
||||
struct fd_bo *tess_bo;
|
||||
|
||||
/* table with PIPE_PRIM_MAX+1 entries mapping PIPE_PRIM_x to
|
||||
* DI_PT_x value to use for draw initiator. There are some
|
||||
* slight differences between generation.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue