diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt index 07962b440b5..d1ac6750e2a 100644 --- a/src/freedreno/ci/freedreno-a630-fails.txt +++ b/src/freedreno/ci/freedreno-a630-fails.txt @@ -17,10 +17,6 @@ KHR-GLES31.core.geometry_shader.layered_framebuffer.depth_support,Fail KHR-GLES31.core.geometry_shader.layered_framebuffer.stencil_support,Fail -# " Pixel data comparison failed; expected: (0.1, 0.2, 0.3, 0.4) rendered: (0, 0, 0, 0) epsilon: 0.00392157 -# Pixel data comparison failed at esextcTessellationShaderPoints.cpp:597" -KHR-GLES31.core.tessellation_shader.tessellation_shader_point_mode.point_rendering,Fail - # "Invalid value returned: expected:[1, 1, 1, 1] retrieved: [0, 0, 0, 0 # Invalid rendering result at esextcTessellationShaderBarrier.cpp:504" KHR-GLES31.core.tessellation_shader.tessellation_shader_tc_barriers.barrier_guarded_read_calls,Fail diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index dc8343f8814..f774d52361a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -108,21 +108,23 @@ emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, } static void -emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, +emit_tess_bos(struct fd_screen *screen, struct fd_ringbuffer *ring, struct ir3_shader_variant *s) assert_dt { - struct fd_context *ctx = emit->ctx; const struct ir3_const_state *const_state = ir3_const_state(s); const unsigned regid = const_state->offsets.primitive_param * 4 + 4; - uint32_t dwords = 16; + uint32_t dwords = 8; - OUT_PKT7(ring, fd6_stage2opcode(s->type), 3); + OUT_PKT7(ring, fd6_stage2opcode(s->type), 7); OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) | CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4)); - OUT_RB(ring, ctx->batch->tess_addrs_constobj); + OUT_RING(ring, 0); + OUT_RING(ring, 0); + OUT_RELOC(ring, screen->tess_bo, FD6_TESS_FACTOR_SIZE, 0, 0); + OUT_RELOC(ring, screen->tess_bo, 0, 0, 0); } static void @@ -166,7 +168,7 @@ fd6_build_tess_consts(struct fd6_emit *emit) emit_stage_tess_consts(constobj, emit->hs, hs_params, ARRAY_SIZE(hs_params)); - emit_tess_bos(constobj, emit, emit->hs); + emit_tess_bos(ctx->screen, constobj, emit->hs); if (emit->gs) num_vertices = emit->gs->shader->nir->info.gs.vertices_in; @@ -179,7 +181,7 @@ fd6_build_tess_consts(struct fd6_emit *emit) emit_stage_tess_consts(constobj, emit->ds, ds_params, ARRAY_SIZE(ds_params)); - emit_tess_bos(constobj, emit, emit->ds); + emit_tess_bos(ctx->screen, constobj, emit->ds); } if (emit->gs) { diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index a79f6a5800e..5d7e2fb9997 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -261,41 +261,16 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, draw0.prim_type = DI_PT_PATCHES0 + ctx->patch_vertices; draw0.tess_enable = true; - const unsigned max_count = 2048; - unsigned count; - - /** - * We can cap tessparam/tessfactor buffer sizes at the sub-draw - * limit. But in the indirect-draw case we must assume the worst. - */ - if (indirect && indirect->buffer) { - count = ALIGN_NPOT(max_count, ctx->patch_vertices); - } else { - count = MIN2(max_count, draw->count); - count = ALIGN_NPOT(count, ctx->patch_vertices); - } + /* maximum number of patches that can fit in tess factor/param buffers */ + uint32_t subdraw_size = MIN2(FD6_TESS_FACTOR_SIZE / factor_stride, + FD6_TESS_PARAM_SIZE / (emit.hs->output_size * 4)); + /* convert from # of patches to draw count */ + subdraw_size *= ctx->patch_vertices; OUT_PKT7(ring, CP_SET_SUBDRAW_SIZE, 1); - OUT_RING(ring, count); + OUT_RING(ring, subdraw_size); ctx->batch->tessellation = true; - ctx->batch->tessparam_size = - MAX2(ctx->batch->tessparam_size, emit.hs->output_size * 4 * count); - ctx->batch->tessfactor_size = - MAX2(ctx->batch->tessfactor_size, factor_stride * count); - - if (!ctx->batch->tess_addrs_constobj) { - /* Reserve space for the bo address - we'll write them later in - * setup_tess_buffers(). We need 2 bo address, but indirect - * constant upload needs at least 4 vec4s. - */ - unsigned size = 4 * 16; - - ctx->batch->tess_addrs_constobj = fd_submit_new_ringbuffer( - ctx->batch->submit, size, FD_RINGBUFFER_STREAMING); - - ctx->batch->tess_addrs_constobj->cur += size; - } } uint32_t index_start = info->index_size ? draw->index_bias : draw->start; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index f7590222564..21ef07daca9 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1339,6 +1339,15 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1); OUT_RING(ring, 0x00000000); + /* This happens after all drawing has been emitted to the draw CS, so we know + * whether we need the tess BO pointers. + */ + if (batch->tessellation) { + assert(screen->tess_bo); + OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2); + OUT_RELOC(ring, screen->tess_bo, 0, 0, 0); + } + if (!batch->nondraw) { trace_end_state_restore(&batch->trace, ring); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index 2f626114e7e..ced8d123c88 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -1552,25 +1552,6 @@ emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt trace_end_clear_restore(&batch->trace, ring); } -static void -setup_tess_buffers(struct fd_batch *batch, struct fd_ringbuffer *ring) -{ - struct fd_context *ctx = batch->ctx; - - batch->tessfactor_bo = fd_bo_new(ctx->screen->dev, batch->tessfactor_size, - 0, "tessfactor"); - - batch->tessparam_bo = fd_bo_new(ctx->screen->dev, batch->tessparam_size, - 0, "tessparam"); - - OUT_PKT4(ring, REG_A6XX_PC_TESSFACTOR_ADDR, 2); - OUT_RELOC(ring, batch->tessfactor_bo, 0, 0, 0); - - batch->tess_addrs_constobj->cur = batch->tess_addrs_constobj->start; - OUT_RELOC(batch->tess_addrs_constobj, batch->tessparam_bo, 0, 0, 0); - OUT_RELOC(batch->tess_addrs_constobj, batch->tessfactor_bo, 0, 0, 0); -} - static void fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt { @@ -1612,9 +1593,6 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS)); emit_marker6(ring, 7); - if (batch->tessellation) - setup_tess_buffers(batch, ring); - OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x0); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index e393317a72c..e568f6a5f14 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -1206,6 +1206,7 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs, const struct ir3_shader_key *key) in_dt { struct fd_context *ctx = fd_context(data); + struct fd_screen *screen = ctx->screen; struct fd6_program_state *state = CALLOC_STRUCT(fd6_program_state); tc_assert_driver_thread(ctx->tc); @@ -1233,6 +1234,19 @@ fd6_program_create(void *data, struct ir3_shader_variant *bs, } #endif + if (hs) { + /* Allocate the fixed-size tess factor BO globally on the screen. This + * lets the program (which ideally we would have shared across contexts, + * though the current ir3_cache impl doesn't do that) bake in the + * addresses. + */ + fd_screen_lock(screen); + if (!screen->tess_bo) + screen->tess_bo = + fd_bo_new(screen->dev, FD6_TESS_BO_SIZE, 0, "tessfactor"); + fd_screen_unlock(screen); + } + setup_config_stateobj(ctx, state); setup_stateobj(state->binning_stateobj, ctx, state, key, true); setup_stateobj(state->stateobj, ctx, state, key, false); diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 6bc1e06969b..9dc4f07b4c0 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -178,12 +178,6 @@ cleanup_submit(struct fd_batch *batch) batch->tile_fini = NULL; } - if (batch->tessellation) { - fd_bo_del(batch->tessfactor_bo); - fd_bo_del(batch->tessparam_bo); - fd_ringbuffer_del(batch->tess_addrs_constobj); - } - fd_submit_del(batch->submit); batch->submit = NULL; } diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index f85ff823852..7c8b9e719a1 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -255,18 +255,6 @@ struct fd_batch { /** set of dependent batches.. holds refs to dependent batches: */ uint32_t dependents_mask; - - /* Buffer for tessellation engine input - */ - struct fd_bo *tessfactor_bo; - uint32_t tessfactor_size; - - /* Buffer for passing parameters between TCS and TES - */ - struct fd_bo *tessparam_bo; - uint32_t tessparam_size; - - struct fd_ringbuffer *tess_addrs_constobj; }; struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 7141db9138a..ea0ef8ea8df 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -144,6 +144,9 @@ fd_screen_destroy(struct pipe_screen *pscreen) { struct fd_screen *screen = fd_screen(pscreen); + if (screen->tess_bo) + fd_bo_del(screen->tess_bo); + if (screen->pipe) fd_pipe_del(screen->pipe); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index ca15d3e4a5f..f3c8bb9b535 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -149,6 +149,12 @@ struct fd_screen { struct renderonly *ro; + /* the blob seems to always use 8K factor and 128K param sizes, copy them */ +#define FD6_TESS_FACTOR_SIZE (8 * 1024) +#define FD6_TESS_PARAM_SIZE (128 * 1024) +#define FD6_TESS_BO_SIZE (FD6_TESS_FACTOR_SIZE + FD6_TESS_PARAM_SIZE) + struct fd_bo *tess_bo; + /* table with PIPE_PRIM_MAX+1 entries mapping PIPE_PRIM_x to * DI_PT_x value to use for draw initiator. There are some * slight differences between generation.