mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 11:00:11 +01:00
util/tc: implement renderpass tracking
this allows tc to track metadata for framebuffer attachments so that drivers can optimize their renderpasses Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19077>
This commit is contained in:
parent
42fafd2f51
commit
07017aa137
2 changed files with 390 additions and 21 deletions
|
|
@ -116,6 +116,103 @@ tc_clear_driver_thread(struct threaded_context *tc)
|
|||
#endif
|
||||
}
|
||||
|
||||
/* ensure the batch's array of renderpass data is large enough for the current index */
|
||||
static void
|
||||
tc_batch_renderpass_infos_resize(struct tc_batch *batch)
|
||||
{
|
||||
unsigned size = batch->renderpass_infos.capacity;
|
||||
unsigned cur_num = batch->renderpass_info_idx;
|
||||
|
||||
if (size / sizeof(struct tc_renderpass_info) > cur_num)
|
||||
return;
|
||||
|
||||
if (!util_dynarray_resize(&batch->renderpass_infos, struct tc_renderpass_info, cur_num + 10))
|
||||
mesa_loge("tc: memory alloc fail!");
|
||||
|
||||
if (size != batch->renderpass_infos.capacity) {
|
||||
/* zero new allocation region */
|
||||
uint8_t *data = batch->renderpass_infos.data;
|
||||
memset(data + size, 0, batch->renderpass_infos.capacity - size);
|
||||
unsigned start = size / sizeof(struct tc_renderpass_info);
|
||||
unsigned count = (batch->renderpass_infos.capacity - size) /
|
||||
sizeof(struct tc_renderpass_info);
|
||||
struct tc_renderpass_info *infos = batch->renderpass_infos.data;
|
||||
for (unsigned i = 0; i < count; i++)
|
||||
util_queue_fence_init(&infos[start + i].ready);
|
||||
}
|
||||
}
|
||||
|
||||
/* signal that the renderpass info is "ready" for use by drivers and will no longer be updated */
|
||||
static void
|
||||
tc_signal_renderpass_info_ready(struct threaded_context *tc)
|
||||
{
|
||||
if (tc->renderpass_info_recording &&
|
||||
!util_queue_fence_is_signalled(&tc->renderpass_info_recording->ready))
|
||||
util_queue_fence_signal(&tc->renderpass_info_recording->ready);
|
||||
}
|
||||
|
||||
/* increment the current renderpass info struct for recording
|
||||
* 'full_copy' is used for preserving data across non-blocking tc batch flushes
|
||||
*/
|
||||
static void
|
||||
tc_batch_increment_renderpass_info(struct threaded_context *tc, bool full_copy)
|
||||
{
|
||||
struct tc_batch *batch = &tc->batch_slots[tc->next];
|
||||
struct tc_renderpass_info *tc_info = batch->renderpass_infos.data;
|
||||
|
||||
/* signal existing info since it will not be used anymore */
|
||||
tc_signal_renderpass_info_ready(tc);
|
||||
/* increment rp info and initialize it */
|
||||
batch->renderpass_info_idx++;
|
||||
tc_batch_renderpass_infos_resize(batch);
|
||||
tc_info = batch->renderpass_infos.data;
|
||||
|
||||
if (full_copy) {
|
||||
/* copy the previous data in its entirety: this is still the same renderpass */
|
||||
if (tc->renderpass_info_recording)
|
||||
tc_info[batch->renderpass_info_idx].data = tc->renderpass_info_recording->data;
|
||||
else
|
||||
tc_info[batch->renderpass_info_idx].data = 0;
|
||||
} else {
|
||||
/* selectively copy: only the CSO metadata is copied, and a new framebuffer state will be added later */
|
||||
tc_info[batch->renderpass_info_idx].data = 0;
|
||||
if (tc->renderpass_info_recording)
|
||||
tc_info[batch->renderpass_info_idx].data16[2] = tc->renderpass_info_recording->data16[2];
|
||||
}
|
||||
|
||||
util_queue_fence_reset(&tc_info[batch->renderpass_info_idx].ready);
|
||||
assert(tc->renderpass_info_recording != &tc_info[batch->renderpass_info_idx]);
|
||||
/* this is now the current recording renderpass info */
|
||||
tc->renderpass_info_recording = &tc_info[batch->renderpass_info_idx];
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE struct tc_renderpass_info *
|
||||
tc_get_renderpass_info(struct threaded_context *tc)
|
||||
{
|
||||
return tc->renderpass_info_recording;
|
||||
}
|
||||
|
||||
/* update metadata at draw time */
|
||||
static void
|
||||
tc_parse_draw(struct threaded_context *tc)
|
||||
{
|
||||
struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
|
||||
|
||||
if (info) {
|
||||
/* all buffers that aren't cleared are considered loaded */
|
||||
info->cbuf_load |= ~info->cbuf_clear;
|
||||
if (!info->zsbuf_clear)
|
||||
info->zsbuf_load = true;
|
||||
/* previous invalidates are no longer relevant */
|
||||
info->cbuf_invalidate = 0;
|
||||
info->zsbuf_invalidate = false;
|
||||
info->has_draw = true;
|
||||
}
|
||||
|
||||
tc->in_renderpass = true;
|
||||
tc->seen_fb_state = true;
|
||||
}
|
||||
|
||||
static void *
|
||||
to_call_check(void *ptr, unsigned num_slots)
|
||||
{
|
||||
|
|
@ -194,18 +291,13 @@ tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
|
|||
#define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
|
||||
offsetof(struct pipe_draw_info, min_index)
|
||||
|
||||
static void
|
||||
tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
|
||||
ALWAYS_INLINE static void
|
||||
batch_execute(struct tc_batch *batch, struct pipe_context *pipe, uint64_t *last, bool parsing)
|
||||
{
|
||||
struct tc_batch *batch = job;
|
||||
struct pipe_context *pipe = batch->tc->pipe;
|
||||
uint64_t *last = &batch->slots[batch->num_total_slots];
|
||||
|
||||
tc_batch_check(batch);
|
||||
tc_set_driver_thread(batch->tc);
|
||||
|
||||
assert(!batch->token);
|
||||
|
||||
/* if the framebuffer state is persisting from a previous batch,
|
||||
* begin incrementing renderpass info on the first set_framebuffer_state call
|
||||
*/
|
||||
bool first = !batch->first_set_fb;
|
||||
for (uint64_t *iter = batch->slots; iter != last;) {
|
||||
struct tc_call_base *call = (struct tc_call_base *)iter;
|
||||
|
||||
|
|
@ -218,7 +310,50 @@ tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
|
|||
TC_TRACE_SCOPE(call->call_id);
|
||||
|
||||
iter += execute_func[call->call_id](pipe, call, last);
|
||||
|
||||
if (parsing) {
|
||||
if (call->call_id == TC_CALL_flush) {
|
||||
/* always increment renderpass info for non-deferred flushes */
|
||||
batch->tc->renderpass_info++;
|
||||
/* if a flush happens, renderpass info is always incremented after */
|
||||
first = false;
|
||||
} else if (call->call_id == TC_CALL_set_framebuffer_state) {
|
||||
/* the renderpass info pointer is already set at the start of the batch,
|
||||
* so don't increment on the first set_framebuffer_state call
|
||||
*/
|
||||
if (!first)
|
||||
batch->tc->renderpass_info++;
|
||||
first = false;
|
||||
} else if (call->call_id >= TC_CALL_draw_single &&
|
||||
call->call_id <= TC_CALL_draw_vstate_multi) {
|
||||
/* if a draw happens before a set_framebuffer_state on this batch,
|
||||
* begin incrementing renderpass data
|
||||
*/
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
|
||||
{
|
||||
struct tc_batch *batch = job;
|
||||
struct pipe_context *pipe = batch->tc->pipe;
|
||||
uint64_t *last = &batch->slots[batch->num_total_slots];
|
||||
|
||||
tc_batch_check(batch);
|
||||
tc_set_driver_thread(batch->tc);
|
||||
|
||||
assert(!batch->token);
|
||||
|
||||
/* setup renderpass info */
|
||||
batch->tc->renderpass_info = batch->renderpass_infos.data;
|
||||
|
||||
if (batch->tc->options.parse_renderpass_info)
|
||||
batch_execute(batch, pipe, last, true);
|
||||
else
|
||||
batch_execute(batch, pipe, last, false);
|
||||
|
||||
/* Add the fence to the list of fences for the driver to signal at the next
|
||||
* flush, which we use for tracking which buffers are referenced by
|
||||
|
|
@ -247,6 +382,7 @@ tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
|
|||
tc_batch_check(batch);
|
||||
batch->num_total_slots = 0;
|
||||
batch->last_mergeable_call = NULL;
|
||||
batch->first_set_fb = false;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -267,7 +403,7 @@ tc_begin_next_buffer_list(struct threaded_context *tc)
|
|||
}
|
||||
|
||||
static void
|
||||
tc_batch_flush(struct threaded_context *tc)
|
||||
tc_batch_flush(struct threaded_context *tc, bool full_copy)
|
||||
{
|
||||
struct tc_batch *next = &tc->batch_slots[tc->next];
|
||||
|
||||
|
|
@ -281,12 +417,22 @@ tc_batch_flush(struct threaded_context *tc)
|
|||
next->token->tc = NULL;
|
||||
tc_unflushed_batch_token_reference(&next->token, NULL);
|
||||
}
|
||||
/* reset renderpass info index for subsequent use */
|
||||
next->renderpass_info_idx = -1;
|
||||
|
||||
util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
|
||||
NULL, 0);
|
||||
tc->last = tc->next;
|
||||
tc->next = (tc->next + 1) % TC_MAX_BATCHES;
|
||||
tc_begin_next_buffer_list(tc);
|
||||
|
||||
/* always increment renderpass info on batch flush;
|
||||
* renderpass info can only be accessed by its owner batch during execution
|
||||
*/
|
||||
if (tc->renderpass_info_recording) {
|
||||
tc->batch_slots[tc->next].first_set_fb = full_copy;
|
||||
tc_batch_increment_renderpass_info(tc, full_copy);
|
||||
}
|
||||
}
|
||||
|
||||
/* This is the function that adds variable-sized calls into the current
|
||||
|
|
@ -303,7 +449,8 @@ tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
|
|||
tc_debug_check(tc);
|
||||
|
||||
if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
|
||||
tc_batch_flush(tc);
|
||||
/* copy existing renderpass info during flush */
|
||||
tc_batch_flush(tc, true);
|
||||
next = &tc->batch_slots[tc->next];
|
||||
tc_assert(next->num_total_slots == 0);
|
||||
tc_assert(next->last_mergeable_call == NULL);
|
||||
|
|
@ -406,6 +553,8 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
|
|||
|
||||
tc_debug_check(tc);
|
||||
|
||||
tc_signal_renderpass_info_ready(tc);
|
||||
|
||||
/* Only wait for queued calls... */
|
||||
if (!util_queue_fence_is_signalled(&last->fence)) {
|
||||
util_queue_fence_wait(&last->fence);
|
||||
|
|
@ -438,6 +587,17 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
|
|||
|
||||
tc_debug_check(tc);
|
||||
|
||||
if (tc->options.parse_renderpass_info) {
|
||||
int renderpass_info_idx = next->renderpass_info_idx;
|
||||
if (renderpass_info_idx > 0) {
|
||||
next->renderpass_info_idx = -1;
|
||||
tc_batch_increment_renderpass_info(tc, false);
|
||||
} else if (tc->renderpass_info_recording->has_draw) {
|
||||
tc->renderpass_info_recording->data32[0] = 0;
|
||||
}
|
||||
tc->seen_fb_state = false;
|
||||
}
|
||||
|
||||
MESA_TRACE_END();
|
||||
}
|
||||
|
||||
|
|
@ -466,7 +626,7 @@ threaded_context_flush(struct pipe_context *_pipe,
|
|||
* running. That should be better for cache locality.
|
||||
*/
|
||||
if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
|
||||
tc_batch_flush(tc);
|
||||
tc_batch_flush(tc, false);
|
||||
else
|
||||
tc_sync(token->tc);
|
||||
}
|
||||
|
|
@ -1097,11 +1257,33 @@ TC_CSO_WHOLE(blend)
|
|||
TC_CSO_WHOLE(rasterizer)
|
||||
TC_CSO_CREATE(depth_stencil_alpha, depth_stencil_alpha)
|
||||
TC_CSO_BIND(depth_stencil_alpha,
|
||||
if (param && tc->options.parse_renderpass_info) {
|
||||
/* dsa info is only ever added during a renderpass;
|
||||
* changes outside of a renderpass reset the data
|
||||
*/
|
||||
if (!tc->in_renderpass) {
|
||||
tc_get_renderpass_info(tc)->zsbuf_write_dsa = 0;
|
||||
tc_get_renderpass_info(tc)->zsbuf_read_dsa = 0;
|
||||
}
|
||||
/* let the driver parse its own state */
|
||||
tc->options.dsa_parse(param, tc_get_renderpass_info(tc));
|
||||
}
|
||||
)
|
||||
TC_CSO_DELETE(depth_stencil_alpha)
|
||||
TC_CSO_WHOLE(compute)
|
||||
TC_CSO_CREATE(fs, shader)
|
||||
TC_CSO_BIND(fs,
|
||||
if (param && tc->options.parse_renderpass_info) {
|
||||
/* fs info is only ever added during a renderpass;
|
||||
* changes outside of a renderpass reset the data
|
||||
*/
|
||||
if (!tc->in_renderpass) {
|
||||
tc_get_renderpass_info(tc)->cbuf_fbfetch = 0;
|
||||
tc_get_renderpass_info(tc)->zsbuf_write_fs = 0;
|
||||
}
|
||||
/* let the driver parse its own state */
|
||||
tc->options.fs_parse(param, tc_get_renderpass_info(tc));
|
||||
}
|
||||
)
|
||||
TC_CSO_DELETE(fs)
|
||||
TC_CSO_SHADER(vs)
|
||||
|
|
@ -1199,10 +1381,48 @@ tc_set_framebuffer_state(struct pipe_context *_pipe,
|
|||
p->state.layers = fb->layers;
|
||||
p->state.nr_cbufs = nr_cbufs;
|
||||
|
||||
for (unsigned i = 0; i < nr_cbufs; i++) {
|
||||
p->state.cbufs[i] = NULL;
|
||||
pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
|
||||
|
||||
if (tc->options.parse_renderpass_info) {
|
||||
/* store existing zsbuf data for possible persistence */
|
||||
uint8_t zsbuf = tc->renderpass_info_recording->has_draw ?
|
||||
0 :
|
||||
tc->renderpass_info_recording->data8[3];
|
||||
bool zsbuf_changed = tc->fb_resources[PIPE_MAX_COLOR_BUFS] !=
|
||||
(fb->zsbuf ? fb->zsbuf->texture : NULL);
|
||||
|
||||
for (unsigned i = 0; i < nr_cbufs; i++) {
|
||||
p->state.cbufs[i] = NULL;
|
||||
pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
|
||||
/* full tracking requires storing the fb attachment resources */
|
||||
tc->fb_resources[i] = fb->cbufs[i] ? fb->cbufs[i]->texture : NULL;
|
||||
}
|
||||
memset(&tc->fb_resources[nr_cbufs], 0,
|
||||
sizeof(void*) * (PIPE_MAX_COLOR_BUFS - nr_cbufs));
|
||||
|
||||
tc->fb_resources[PIPE_MAX_COLOR_BUFS] = fb->zsbuf ? fb->zsbuf->texture : NULL;
|
||||
if (tc->seen_fb_state) {
|
||||
/* this is the end of a renderpass, so increment the renderpass info */
|
||||
tc_batch_increment_renderpass_info(tc, false);
|
||||
/* if zsbuf hasn't changed (i.e., possibly just adding a color buffer):
|
||||
* keep zsbuf usage data
|
||||
*/
|
||||
if (!zsbuf_changed)
|
||||
tc->renderpass_info_recording->data8[3] = zsbuf;
|
||||
} else {
|
||||
/* this is the first time a set_framebuffer_call is triggered;
|
||||
* just increment the index and keep using the existing info for recording
|
||||
*/
|
||||
tc->batch_slots[tc->next].renderpass_info_idx = 0;
|
||||
}
|
||||
/* future fb state changes will increment the index */
|
||||
tc->seen_fb_state = true;
|
||||
} else {
|
||||
for (unsigned i = 0; i < nr_cbufs; i++) {
|
||||
p->state.cbufs[i] = NULL;
|
||||
pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
|
||||
}
|
||||
}
|
||||
tc->in_renderpass = false;
|
||||
p->state.zsbuf = NULL;
|
||||
pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
|
||||
}
|
||||
|
|
@ -3185,6 +3405,8 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
|
|||
struct pipe_screen *screen = pipe->screen;
|
||||
bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
|
||||
|
||||
tc->in_renderpass = false;
|
||||
|
||||
if (async && tc->options.create_fence) {
|
||||
if (fence) {
|
||||
struct tc_batch *next = &tc->batch_slots[tc->next];
|
||||
|
|
@ -3215,17 +3437,25 @@ tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
|
|||
p->fence = fence ? *fence : NULL;
|
||||
p->flags = flags | TC_FLUSH_ASYNC;
|
||||
|
||||
if (!(flags & PIPE_FLUSH_DEFERRED))
|
||||
tc_batch_flush(tc);
|
||||
if (!(flags & PIPE_FLUSH_DEFERRED)) {
|
||||
/* non-deferred async flushes indicate completion of existing renderpass info */
|
||||
tc_signal_renderpass_info_ready(tc);
|
||||
tc_batch_flush(tc, false);
|
||||
tc->seen_fb_state = false;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
out_of_memory:
|
||||
/* renderpass info is signaled during sync */
|
||||
tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
|
||||
flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
|
||||
|
||||
if (!(flags & PIPE_FLUSH_DEFERRED))
|
||||
if (!(flags & PIPE_FLUSH_DEFERRED)) {
|
||||
tc_flush_queries(tc);
|
||||
tc->seen_fb_state = false;
|
||||
}
|
||||
tc_set_driver_thread(tc);
|
||||
pipe->flush(pipe, fence, flags);
|
||||
tc_clear_driver_thread(tc);
|
||||
|
|
@ -3440,6 +3670,7 @@ tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
|
|||
struct threaded_context *tc = threaded_context(_pipe);
|
||||
unsigned index_size = info->index_size;
|
||||
bool has_user_indices = info->has_user_indices;
|
||||
tc_parse_draw(tc);
|
||||
|
||||
if (unlikely(indirect)) {
|
||||
assert(!has_user_indices);
|
||||
|
|
@ -3759,6 +3990,7 @@ tc_draw_vertex_state(struct pipe_context *_pipe,
|
|||
unsigned num_draws)
|
||||
{
|
||||
struct threaded_context *tc = threaded_context(_pipe);
|
||||
tc_parse_draw(tc);
|
||||
|
||||
if (num_draws == 1) {
|
||||
/* Single draw. */
|
||||
|
|
@ -4053,6 +4285,18 @@ tc_invalidate_resource(struct pipe_context *_pipe,
|
|||
struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
|
||||
tc_resource_call);
|
||||
tc_set_resource_reference(&call->resource, resource);
|
||||
|
||||
struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
|
||||
if (info) {
|
||||
if (tc->fb_resources[PIPE_MAX_COLOR_BUFS] == resource) {
|
||||
info->zsbuf_invalidate = true;
|
||||
} else {
|
||||
for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
|
||||
if (tc->fb_resources[i] == resource)
|
||||
info->cbuf_invalidate |= BITFIELD_BIT(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct tc_clear {
|
||||
|
|
@ -4083,8 +4327,23 @@ tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor
|
|||
struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
|
||||
|
||||
p->buffers = buffers;
|
||||
if (scissor_state)
|
||||
if (scissor_state) {
|
||||
p->scissor_state = *scissor_state;
|
||||
struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
|
||||
/* partial clear info is useful for drivers to know whether any zs writes occur;
|
||||
* drivers are responsible for optimizing partial clear -> full clear
|
||||
*/
|
||||
if (info && buffers & PIPE_CLEAR_DEPTHSTENCIL)
|
||||
info->zsbuf_clear_partial |= !info->zsbuf_clear;
|
||||
} else {
|
||||
struct tc_renderpass_info *info = tc_get_renderpass_info(tc);
|
||||
if (info) {
|
||||
/* full clears use a different load operation, but are only valid if draws haven't occurred yet */
|
||||
info->cbuf_clear |= (buffers >> 2) & ~info->cbuf_load;
|
||||
if (buffers & PIPE_CLEAR_DEPTHSTENCIL && !info->zsbuf_load && !info->zsbuf_clear_partial)
|
||||
info->zsbuf_clear = true;
|
||||
}
|
||||
}
|
||||
p->scissor_state_set = !!scissor_state;
|
||||
p->color = *color;
|
||||
p->depth = depth;
|
||||
|
|
@ -4480,6 +4739,7 @@ tc_destroy(struct pipe_context *_pipe)
|
|||
|
||||
for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
|
||||
util_queue_fence_destroy(&tc->batch_slots[i].fence);
|
||||
util_dynarray_fini(&tc->batch_slots[i].renderpass_infos);
|
||||
assert(!tc->batch_slots[i].token);
|
||||
}
|
||||
}
|
||||
|
|
@ -4595,6 +4855,11 @@ threaded_context_create(struct pipe_context *pipe,
|
|||
#endif
|
||||
tc->batch_slots[i].tc = tc;
|
||||
util_queue_fence_init(&tc->batch_slots[i].fence);
|
||||
tc->batch_slots[i].renderpass_info_idx = -1;
|
||||
if (tc->options.parse_renderpass_info) {
|
||||
util_dynarray_init(&tc->batch_slots[i].renderpass_infos, NULL);
|
||||
tc_batch_renderpass_infos_resize(&tc->batch_slots[i]);
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
|
||||
util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
|
||||
|
|
@ -4756,6 +5021,8 @@ threaded_context_create(struct pipe_context *pipe,
|
|||
*out = tc;
|
||||
|
||||
tc_begin_next_buffer_list(tc);
|
||||
if (tc->options.parse_renderpass_info)
|
||||
tc_batch_increment_renderpass_info(tc, false);
|
||||
return &tc->base;
|
||||
|
||||
fail:
|
||||
|
|
@ -4773,3 +5040,11 @@ threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned d
|
|||
tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
|
||||
}
|
||||
}
|
||||
|
||||
const struct tc_renderpass_info *
|
||||
threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait)
|
||||
{
|
||||
if (tc->renderpass_info && wait)
|
||||
util_queue_fence_wait(&tc->renderpass_info->ready);
|
||||
return tc->renderpass_info;
|
||||
}
|
||||
|
|
@ -204,6 +204,7 @@
|
|||
#include "util/u_range.h"
|
||||
#include "util/u_thread.h"
|
||||
#include "util/slab.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -424,6 +425,63 @@ struct tc_unflushed_batch_token {
|
|||
struct threaded_context *tc;
|
||||
};
|
||||
|
||||
struct tc_renderpass_info {
|
||||
union {
|
||||
struct {
|
||||
/* bitmask of full-cleared color buffers */
|
||||
uint8_t cbuf_clear;
|
||||
/* bitmask of not-full-cleared color buffers */
|
||||
uint8_t cbuf_load;
|
||||
/* bitmask of color buffers that have their stores invalidated */
|
||||
uint8_t cbuf_invalidate;
|
||||
/* whether the zsbuf is full-cleared */
|
||||
bool zsbuf_clear : 1;
|
||||
/* whether the zsbuf is partial-cleared */
|
||||
bool zsbuf_clear_partial : 1;
|
||||
/* whether the zsbuf is not-full-cleared */
|
||||
bool zsbuf_load : 1;
|
||||
/* whether the zsbuf is invalidated */
|
||||
bool zsbuf_invalidate : 1;
|
||||
/* whether a draw occurs */
|
||||
bool has_draw : 1;
|
||||
uint8_t pad : 3;
|
||||
/* 32 bits offset */
|
||||
/* bitmask of color buffers using fbfetch */
|
||||
uint8_t cbuf_fbfetch;
|
||||
/* whether the fragment shader writes to the zsbuf */
|
||||
bool zsbuf_write_fs : 1;
|
||||
/* whether the DSA state writes to the zsbuf */
|
||||
bool zsbuf_write_dsa : 1;
|
||||
/* whether the DSA state reads the zsbuf */
|
||||
bool zsbuf_read_dsa : 1;
|
||||
/* whether the zsbuf is used for fbfetch */
|
||||
bool zsbuf_fbfetch : 1;
|
||||
uint8_t pad2 : 4;
|
||||
uint16_t pad3;
|
||||
};
|
||||
uint64_t data;
|
||||
/* fb info is in data32[0] */
|
||||
uint32_t data32[2];
|
||||
/* cso info is in data16[2] */
|
||||
uint16_t data16[4];
|
||||
/* zsbuf fb info is in data8[3] */
|
||||
uint8_t data8[8];
|
||||
};
|
||||
/* determines whether the info can be "safely" read by drivers or if it may still be in use */
|
||||
struct util_queue_fence ready;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
tc_renderpass_info_is_zsbuf_used(const struct tc_renderpass_info *info)
|
||||
{
|
||||
return info->zsbuf_clear ||
|
||||
info->zsbuf_clear_partial ||
|
||||
info->zsbuf_write_fs ||
|
||||
info->zsbuf_write_dsa ||
|
||||
info->zsbuf_read_dsa ||
|
||||
info->zsbuf_fbfetch;
|
||||
}
|
||||
|
||||
struct tc_batch {
|
||||
struct threaded_context *tc;
|
||||
#if !defined(NDEBUG) && TC_DEBUG >= 1
|
||||
|
|
@ -431,6 +489,8 @@ struct tc_batch {
|
|||
#endif
|
||||
uint16_t num_total_slots;
|
||||
uint16_t buffer_list_index;
|
||||
/* the index of the current renderpass info for recording */
|
||||
int renderpass_info_idx;
|
||||
|
||||
/* The last mergeable call that was added to this batch (i.e.
|
||||
* buffer subdata). This might be out-of-date or NULL.
|
||||
|
|
@ -438,8 +498,11 @@ struct tc_batch {
|
|||
struct tc_call_base *last_mergeable_call;
|
||||
|
||||
struct util_queue_fence fence;
|
||||
/* whether the first set_framebuffer_state call has been seen by this batch */
|
||||
bool first_set_fb;
|
||||
struct tc_unflushed_batch_token *token;
|
||||
uint64_t slots[TC_SLOTS_PER_BATCH];
|
||||
struct util_dynarray renderpass_infos;
|
||||
};
|
||||
|
||||
struct tc_buffer_list {
|
||||
|
|
@ -468,6 +531,13 @@ struct threaded_context_options {
|
|||
|
||||
/* If true, create_fence_fd doesn't access the context in the driver. */
|
||||
bool unsynchronized_create_fence_fd;
|
||||
/* if true, parse and track renderpass info during execution */
|
||||
bool parse_renderpass_info;
|
||||
/* callbacks for drivers to read their DSA/FS state and update renderpass info accordingly
|
||||
* note: drivers must ONLY append to renderpass info using |=
|
||||
*/
|
||||
void (*dsa_parse)(void *state, struct tc_renderpass_info *info);
|
||||
void (*fs_parse)(void *state, struct tc_renderpass_info *info);
|
||||
};
|
||||
|
||||
struct threaded_context {
|
||||
|
|
@ -511,6 +581,10 @@ struct threaded_context {
|
|||
bool seen_tcs;
|
||||
bool seen_tes;
|
||||
bool seen_gs;
|
||||
/* whether the current renderpass has seen a set_framebuffer_state call */
|
||||
bool seen_fb_state;
|
||||
/* whether a renderpass is currently active */
|
||||
bool in_renderpass;
|
||||
|
||||
bool seen_streamout_buffers;
|
||||
bool seen_shader_buffers[PIPE_SHADER_TYPES];
|
||||
|
|
@ -545,13 +619,33 @@ struct threaded_context {
|
|||
|
||||
struct tc_batch batch_slots[TC_MAX_BATCHES];
|
||||
struct tc_buffer_list buffer_lists[TC_MAX_BUFFER_LISTS];
|
||||
/* the curent framebuffer attachments; [PIPE_MAX_COLOR_BUFS] is the zsbuf */
|
||||
struct pipe_resource *fb_resources[PIPE_MAX_COLOR_BUFS + 1];
|
||||
/* accessed by main thread; preserves info across batches */
|
||||
struct tc_renderpass_info *renderpass_info_recording;
|
||||
/* accessed by driver thread */
|
||||
struct tc_renderpass_info *renderpass_info;
|
||||
};
|
||||
|
||||
|
||||
void threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage);
|
||||
void threaded_resource_deinit(struct pipe_resource *res);
|
||||
struct pipe_context *threaded_context_unwrap_sync(struct pipe_context *pipe);
|
||||
void tc_driver_internal_flush_notify(struct threaded_context *tc);
|
||||
|
||||
/** function for getting the current renderpass info:
|
||||
* - renderpass info is always valid
|
||||
* - set 'wait=true' when calling during normal execution
|
||||
* - set 'wait=true' when calling from flush
|
||||
*
|
||||
* Rules:
|
||||
* 1) this must be called with 'wait=true' after the driver receives a pipe_context::set_framebuffer_state callback
|
||||
* 2) this should be called with 'wait=false' when the driver receives a blocking pipe_context::flush call
|
||||
* 3) this must not be used during any internal driver operations (e.g., u_blitter)
|
||||
*/
|
||||
const struct tc_renderpass_info *
|
||||
threaded_context_get_renderpass_info(struct threaded_context *tc, bool wait);
|
||||
|
||||
struct pipe_context *
|
||||
threaded_context_create(struct pipe_context *pipe,
|
||||
struct slab_parent_pool *parent_transfer_pool,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue