etnaviv: Implement hardware based streamout support

Add support for hardware-accelerated transform feedback using the TFB
command register to control capture state.

Maintains the hardware state through an enum distinguishing between
idle (no hardware state established), active (hardware currently
capturing), and paused (hardware stopped).

Hardware commands are emitted based on state transitions:
 - ENABLE when moving from idle to active
 - RESUME when transitioning from paused to active
 - DISABLE when stopping capture

Transform feedback buffer setup is using the existing dirty state
mechanism through ETNA_DIRTY_STREAMOUT_BUFS, while command emission uses
the new ETNA_DIRTY_STREAMOUT_CMD flag. Buffer descriptors are computed by
mapping vertex shader transform feedback outputs to fragment shader input
registers, as required by the hardware.

A 64-byte context buffer is allocated per context to maintain hardware
state isolation between applications using transform feedback
simultaneously. The hardware state persists across pause and resume
cycles within a command stream but resets during flushes since transform
feedback state does not survive command buffer boundaries.

The implementation enables the full transform feedback capability with
support for 4 buffers and up to 64 separate or interleaved components,
replacing the previous debug-only stub implementation.

Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37320>
This commit is contained in:
Christian Gmeiner 2025-08-08 12:48:33 +02:00 committed by Marge Bot
parent 52bc3c2d20
commit 0c6c1fa484
5 changed files with 202 additions and 6 deletions

View file

@ -104,6 +104,9 @@ etna_context_destroy(struct pipe_context *pctx)
{
struct etna_context *ctx = etna_context(pctx);
if (VIV_FEATURE(ctx->screen, ETNA_FEATURE_HWTFB))
pipe_resource_reference(&ctx->streamout.context_buffer, NULL);
if (ctx->pending_resources)
_mesa_hash_table_destroy(ctx->pending_resources, NULL);
@ -595,6 +598,14 @@ etna_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
ctx->stats.flushes++;
if (VIV_FEATURE(ctx->screen, ETNA_FEATURE_HWTFB)) {
if (ctx->streamout.xfb_hw_state == ETNA_XFB_HW_ACTIVE)
etna_set_state(ctx->stream, VIVS_TFB_COMMAND, TFB_COMMAND_DISABLE);
ctx->streamout.xfb_hw_state = ETNA_XFB_HW_IDLE;
ctx->streamout.xfb_should_be_active = false;
}
list_for_each_entry(struct etna_acc_query, aq, &ctx->active_acc_queries, node)
etna_acc_query_suspend(aq, ctx);
@ -764,6 +775,14 @@ etna_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
list_inithead(&ctx->active_acc_queries);
if (VIV_FEATURE(ctx->screen, ETNA_FEATURE_HWTFB)) {
ctx->streamout.context_buffer =
pipe_buffer_create(&ctx->screen->base, PIPE_BIND_QUERY_BUFFER, 0, 64);
if (!ctx->streamout.context_buffer)
goto fail;
}
return pctx;
fail:

View file

@ -92,9 +92,28 @@ struct etna_shader_state {
struct etna_shader_variant *vs, *fs;
};
enum etna_xfb_hw_state {
ETNA_XFB_HW_IDLE,
ETNA_XFB_HW_ACTIVE,
ETNA_XFB_HW_PAUSED,
};
struct etna_streamout {
struct pipe_resource *context_buffer;
struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
unsigned num_targets;
bool xfb_should_be_active;
enum etna_xfb_hw_state xfb_hw_state;
uint32_t TFB_BUFFER_SIZE[PIPE_MAX_SO_BUFFERS];
uint32_t TFB_BUFFER_STRIDE[PIPE_MAX_SO_BUFFERS];
struct etna_reloc TFB_BUFFER_ADDR[PIPE_MAX_SO_BUFFERS];
unsigned num_descriptors;
uint32_t TFB_DESCRIPTOR_COUNT[VIVS_TFB_DESCRIPTOR_COUNT__LEN];
uint32_t TFB_DESCRIPTOR[VIVS_TFB_DESCRIPTOR__LEN];
};
enum etna_uniform_contents {
@ -157,6 +176,7 @@ struct etna_context {
ETNA_DIRTY_SCISSOR_CLIP = (1 << 20),
ETNA_DIRTY_SHADER_CACHES = (1 << 21),
ETNA_DIRTY_STREAMOUT = (1 << 22),
ETNA_DIRTY_STREAMOUT_CMD = (1 << 23)
} dirty;
struct slab_child_pool transfer_pool;

View file

@ -606,9 +606,46 @@ etna_emit_state(struct etna_context *ctx)
}
}
if (unlikely(VIV_FEATURE(screen, ETNA_FEATURE_HWTFB) &&
(dirty & ETNA_DIRTY_RASTERIZER))) {
/*1C000*/ EMIT_STATE(TFB_CONFIG, etna_rasterizer_state(ctx->rasterizer)->TFB_CONFIG);
if (unlikely(VIV_FEATURE(screen, ETNA_FEATURE_HWTFB))) {
if (unlikely(dirty & ETNA_DIRTY_RASTERIZER)) {
/*1C000*/ EMIT_STATE(TFB_CONFIG, etna_rasterizer_state(ctx->rasterizer)->TFB_CONFIG);
}
if (unlikely(dirty & ETNA_DIRTY_STREAMOUT)) {
const struct etna_reloc context_buffer = {
.bo = etna_buffer_resource(ctx->streamout.context_buffer)->bo,
.flags = ETNA_RELOC_READ | ETNA_RELOC_WRITE
};
/*1C008*/ EMIT_STATE_RELOC(TFB_CONTEXT_BUFFER, &context_buffer);
for (int i = 0; i < 4; i++) {
/*1C040*/ EMIT_STATE_RELOC(TFB_BUFFER_ADDR(i), &ctx->streamout.TFB_BUFFER_ADDR[i]);
/*1C080*/ EMIT_STATE(TFB_BUFFER_SIZE(i), ctx->streamout.TFB_BUFFER_SIZE[i]);
/*1C0C0*/ EMIT_STATE(TFB_BUFFER_STRIDE(i), ctx->streamout.TFB_BUFFER_STRIDE[i]);
}
for (int i = 0; i < 4; i++) {
/*1C100*/ EMIT_STATE(TFB_DESCRIPTOR_COUNT(i), ctx->streamout.TFB_DESCRIPTOR_COUNT[i]);
}
for (int i = 0; i < ctx->streamout.num_descriptors; i++) {
/*1C800*/ EMIT_STATE(TFB_DESCRIPTOR(i), ctx->streamout.TFB_DESCRIPTOR[i]);
}
}
if (unlikely(dirty & ETNA_DIRTY_STREAMOUT_CMD)) {
struct etna_streamout *so = &ctx->streamout;
if (so->xfb_should_be_active && so->xfb_hw_state != ETNA_XFB_HW_ACTIVE) {
uint32_t cmd = (so->xfb_hw_state == ETNA_XFB_HW_PAUSED) ?
TFB_COMMAND_RESUME :
TFB_COMMAND_ENABLE;
/*1C004*/ EMIT_STATE(TFB_COMMAND, cmd);
so->xfb_hw_state = ETNA_XFB_HW_ACTIVE;
}
}
}
etna_coalesce_end(stream, &coalesce);

View file

@ -257,9 +257,9 @@ etna_init_screen_caps(struct etna_screen *screen)
caps->texrect = false;
/* Stream output. */
caps->max_stream_output_buffers = DBG_ENABLED(ETNA_DBG_DEQP) ? 4 : 0;
caps->max_stream_output_separate_components = 0;
caps->max_stream_output_interleaved_components = 0;
caps->max_stream_output_buffers = VIV_FEATURE(screen, ETNA_FEATURE_HWTFB) ? 4 : 0;
caps->max_stream_output_separate_components = 64;
caps->max_stream_output_interleaved_components = 64;
caps->max_vertex_attrib_stride = 128;
caps->max_vertex_element_src_offset = 255;

View file

@ -30,6 +30,8 @@
#include "etnaviv_blend.h"
#include "etnaviv_clear_blit.h"
#include "etnaviv_context.h"
#include "etnaviv_compiler.h"
#include "etnaviv_emit.h"
#include "etnaviv_format.h"
#include "etnaviv_rasterizer.h"
#include "etnaviv_screen.h"
@ -37,6 +39,7 @@
#include "etnaviv_translate.h"
#include "etnaviv_util.h"
#include "etnaviv_zsa.h"
#include "nir/nir_xfb_info.h"
#include "util/u_framebuffer.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
@ -769,6 +772,17 @@ etna_set_stream_output_targets(struct pipe_context *pctx,
so->num_targets = num_targets;
if (num_targets > 0) {
so->xfb_should_be_active = true;
ctx->dirty |= ETNA_DIRTY_STREAMOUT_CMD;
} else {
if (so->xfb_hw_state == ETNA_XFB_HW_ACTIVE) {
etna_set_state(ctx->stream, VIVS_TFB_COMMAND, TFB_COMMAND_DISABLE);
so->xfb_hw_state = ETNA_XFB_HW_PAUSED;
}
so->xfb_should_be_active = false;
}
/* There is no need to emit streamout information unless it is active. */
if (so->num_targets > 0)
ctx->dirty |= ETNA_DIRTY_STREAMOUT;
@ -1005,6 +1019,110 @@ etna_record_flush_resources(struct etna_context *ctx)
return true;
}
static int
compare_xfb_outputs(const void *a, const void *b) {
const nir_xfb_output_info *out_a = a;
const nir_xfb_output_info *out_b = b;
if (out_a->buffer != out_b->buffer)
return out_a->buffer - out_b->buffer;
return out_a->offset - out_b->offset;
}
static signed
find_register_for_components(const struct etna_shader_variant *fs, const nir_xfb_output_info *output)
{
/* pos is hardcoded to register 0 for fs */
if (output->location == VARYING_SLOT_POS)
return 0;
/* psize is the last register for fs */
if (output->location == VARYING_SLOT_PSIZ)
return fs->infile.num_reg + 1;
for (int j = 0; j < fs->infile.num_reg; j++) {
if (fs->infile.reg[j].slot == output->location) {
return fs->infile.reg[j].reg;
}
}
return -1;
}
static bool
etna_update_hwxfb(struct etna_context *ctx)
{
if (!VIV_FEATURE(ctx->screen, ETNA_FEATURE_HWTFB))
return true;
const struct etna_shader_variant *vs = ctx->shader.vs;
const struct etna_shader_variant *fs = ctx->shader.fs;
struct nir_xfb_info *xfb_info = vs->shader->nir->xfb_info;
if (!xfb_info)
return true;
assert(xfb_info->streams_written == 1);
assert(fs);
for (unsigned i = 0; i < 4; i++)
ctx->streamout.TFB_DESCRIPTOR_COUNT[i] = 0;
if (ctx->streamout.num_targets == 0) {
for (unsigned buffer = 0; buffer < 4; buffer++) {
ctx->streamout.TFB_BUFFER_STRIDE[buffer] = 0;
ctx->streamout.TFB_BUFFER_ADDR[buffer].bo = NULL;
}
return true;
}
u_foreach_bit(buffer, xfb_info->buffers_written) {
const struct pipe_stream_output_target *target = ctx->streamout.targets[buffer];
const nir_xfb_buffer_info *buf_info = &xfb_info->buffers[buffer];
assert(ctx->streamout.targets[buffer]);
ctx->streamout.TFB_BUFFER_SIZE[buffer] = target->buffer_size;
ctx->streamout.TFB_BUFFER_STRIDE[buffer] = buf_info->stride;
ctx->streamout.TFB_BUFFER_ADDR[buffer].bo = etna_buffer_resource(target->buffer)->bo;
ctx->streamout.TFB_BUFFER_ADDR[buffer].offset = target->buffer_offset;
ctx->streamout.TFB_BUFFER_ADDR[buffer].flags = ETNA_RELOC_WRITE;
}
/* We need to sort our xfb outputs based on buffer and offset to ensure
* that we write at the correct offsets.
*/
qsort(xfb_info->outputs, xfb_info->output_count, sizeof(nir_xfb_output_info),
compare_xfb_outputs);
for (unsigned i = 0; i < xfb_info->output_count; i++) {
const nir_xfb_output_info *output = &xfb_info->outputs[i];
assert(output->component_offset < 4);
ctx->streamout.TFB_DESCRIPTOR_COUNT[output->buffer / 128]++;
/* Hardware expects that we provide the fs input register
* numbers for each vs xfb output.
*/
const int32_t reg = find_register_for_components(fs, output);
assert(reg != -1);
ctx->streamout.TFB_DESCRIPTOR[i] =
VIVS_TFB_DESCRIPTOR_OUTPUT_BUFFER(output->buffer) |
VIVS_TFB_DESCRIPTOR_INPUT_REGISTER(reg) |
VIVS_TFB_DESCRIPTOR_COMPONENT_OFFSET(output->component_offset) |
COND(output->component_mask != 0xf, VIVS_TFB_DESCRIPTOR_COMPONENT_MASK(util_bitcount(output->component_mask)));
}
ctx->streamout.num_descriptors = xfb_info->output_count;
return true;
}
struct etna_state_updater {
bool (*update)(struct etna_context *ctx);
uint32_t dirty;
@ -1036,6 +1154,8 @@ static const struct etna_state_updater etna_state_updates[] = {
},
{
etna_record_flush_resources, ETNA_DIRTY_FRAMEBUFFER,
}, {
etna_update_hwxfb, ETNA_DIRTY_STREAMOUT,
}
};