panfrost: Streamline varying linking code

Refactor all the linking code with the following objectives:

* Remove linking magic (especially around XFB)
* Cleaner code (obviously)
* Less stage coupling (in case someone ever implements geom/tess)
* Decouple ATTRIBUTE from ATTRIBUTE_BUFFER to enable optimizations

The main hack remaining is doing precision linking here to workaround
linking previously used.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10778>
This commit is contained in:
Alyssa Rosenzweig 2021-05-07 13:24:48 -04:00 committed by Marge Bot
parent 4d44d4179e
commit 2c2cf0ecfe
2 changed files with 223 additions and 290 deletions

View file

@ -1801,19 +1801,17 @@ panfrost_emit_varyings(struct panfrost_batch *batch,
} }
static unsigned static unsigned
panfrost_streamout_offset(unsigned stride, panfrost_xfb_offset(unsigned stride, struct pipe_stream_output_target *target)
struct pipe_stream_output_target *target)
{ {
return (target->buffer_offset + (pan_so_target(target)->offset * stride * 4)) & 63; return target->buffer_offset + (pan_so_target(target)->offset * stride);
} }
static void static void
panfrost_emit_streamout(struct panfrost_batch *batch, panfrost_emit_streamout(struct panfrost_batch *batch,
struct mali_attribute_buffer_packed *slot, struct mali_attribute_buffer_packed *slot,
unsigned stride_words, unsigned count, unsigned stride, unsigned count,
struct pipe_stream_output_target *target) struct pipe_stream_output_target *target)
{ {
unsigned stride = stride_words * 4;
unsigned max_size = target->buffer_size; unsigned max_size = target->buffer_size;
unsigned expected_size = stride * count; unsigned expected_size = stride * count;
@ -1829,9 +1827,7 @@ panfrost_emit_streamout(struct panfrost_batch *batch,
PAN_BO_ACCESS_VERTEX_TILER | PAN_BO_ACCESS_VERTEX_TILER |
PAN_BO_ACCESS_FRAGMENT); PAN_BO_ACCESS_FRAGMENT);
/* We will have an offset applied to get alignment */ mali_ptr addr = bo->ptr.gpu + panfrost_xfb_offset(stride, target);
mali_ptr addr = bo->ptr.gpu + target->buffer_offset +
(pan_so_target(target)->offset * stride);
pan_pack(slot, ATTRIBUTE_BUFFER, cfg) { pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
cfg.pointer = (addr & ~63); cfg.pointer = (addr & ~63);
@ -1854,39 +1850,12 @@ pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
unreachable("Varying not captured"); unreachable("Varying not captured");
} }
static unsigned
pan_varying_size(enum mali_format fmt)
{
unsigned type = MALI_EXTRACT_TYPE(fmt);
unsigned chan = MALI_EXTRACT_CHANNELS(fmt);
unsigned bits = MALI_EXTRACT_BITS(fmt);
unsigned bpc = 0;
if (bits == MALI_CHANNEL_FLOAT) {
/* No doubles */
bool fp16 = (type == MALI_FORMAT_SINT);
assert(fp16 || (type == MALI_FORMAT_UNORM));
bpc = fp16 ? 2 : 4;
} else {
assert(type >= MALI_FORMAT_SNORM && type <= MALI_FORMAT_SINT);
/* See the enums */
bits = 1 << bits;
assert(bits >= 8);
bpc = bits / 8;
}
return bpc * chan;
}
/* Given a varying, figure out which index it corresponds to */ /* Given a varying, figure out which index it corresponds to */
static inline unsigned static inline unsigned
pan_varying_index(unsigned present, enum pan_special_varying v) pan_varying_index(unsigned present, enum pan_special_varying v)
{ {
unsigned mask = (1 << v) - 1; return util_bitcount(present & BITFIELD_MASK(v));
return util_bitcount(present & mask);
} }
/* Get the base offset for XFB buffers, which by convention come after /* Get the base offset for XFB buffers, which by convention come after
@ -1899,40 +1868,45 @@ pan_xfb_base(unsigned present)
return util_bitcount(present); return util_bitcount(present);
} }
/* Computes the present mask for varyings so we can start emitting varying records */ /* Determines which varying buffers are required */
static inline unsigned static inline unsigned
pan_varying_present(const struct panfrost_device *dev, pan_varying_present(const struct panfrost_device *dev,
struct panfrost_shader_state *vs, struct pan_shader_info *producer,
struct panfrost_shader_state *fs, struct pan_shader_info *consumer,
uint16_t point_coord_mask) uint16_t point_coord_mask)
{ {
/* At the moment we always emit general and position buffers. Not /* At the moment we always emit general and position buffers. Not
* strictly necessary but usually harmless */ * strictly necessary but usually harmless */
unsigned present = (1 << PAN_VARY_GENERAL) | (1 << PAN_VARY_POSITION); unsigned present = BITFIELD_BIT(PAN_VARY_GENERAL) | BITFIELD_BIT(PAN_VARY_POSITION);
/* Enable special buffers by the shader info */ /* Enable special buffers by the shader info */
if (vs->info.vs.writes_point_size) if (producer->vs.writes_point_size)
present |= (1 << PAN_VARY_PSIZ); present |= BITFIELD_BIT(PAN_VARY_PSIZ);
if (fs->info.fs.reads_point_coord) /* On Bifrost, special fragment varyings are replaced by LD_VAR_SPECIAL */
present |= (1 << PAN_VARY_PNTCOORD); if (pan_is_bifrost(dev))
return present;
if (fs->info.fs.reads_face) /* On Midgard, these exist as real varyings */
present |= (1 << PAN_VARY_FACE); if (consumer->fs.reads_point_coord)
present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
if (fs->info.fs.reads_frag_coord && !pan_is_bifrost(dev)) if (consumer->fs.reads_face)
present |= (1 << PAN_VARY_FRAGCOORD); present |= BITFIELD_BIT(PAN_VARY_FACE);
if (consumer->fs.reads_frag_coord)
present |= BITFIELD_BIT(PAN_VARY_FRAGCOORD);
/* Also, if we have a point sprite, we need a point coord buffer */ /* Also, if we have a point sprite, we need a point coord buffer */
for (unsigned i = 0; i < fs->info.varyings.input_count; i++) { for (unsigned i = 0; i < consumer->varyings.input_count; i++) {
gl_varying_slot loc = fs->info.varyings.input[i].location; gl_varying_slot loc = consumer->varyings.input[i].location;
if (util_varying_is_point_coord(loc, point_coord_mask)) if (util_varying_is_point_coord(loc, point_coord_mask))
present |= (1 << PAN_VARY_PNTCOORD); present |= BITFIELD_BIT(PAN_VARY_PNTCOORD);
} }
return present; return present;
@ -1943,230 +1917,232 @@ pan_varying_present(const struct panfrost_device *dev,
static void static void
pan_emit_vary(const struct panfrost_device *dev, pan_emit_vary(const struct panfrost_device *dev,
struct mali_attribute_packed *out, struct mali_attribute_packed *out,
unsigned present, enum pan_special_varying buf, unsigned buffer_index,
enum mali_format format, unsigned offset) mali_pixel_format format, unsigned offset)
{ {
unsigned nr_channels = MALI_EXTRACT_CHANNELS(format);
unsigned swizzle = dev->quirks & HAS_SWIZZLES ?
panfrost_get_default_swizzle(nr_channels) :
panfrost_bifrost_swizzle(nr_channels);
pan_pack(out, ATTRIBUTE, cfg) { pan_pack(out, ATTRIBUTE, cfg) {
cfg.buffer_index = pan_varying_index(present, buf); cfg.buffer_index = buffer_index;
cfg.offset_enable = !pan_is_bifrost(dev); cfg.offset_enable = !pan_is_bifrost(dev);
cfg.format = (format << 12) | swizzle; cfg.format = format;
cfg.offset = offset; cfg.offset = offset;
} }
} }
/* General varying that is unused */
static void
pan_emit_vary_only(const struct panfrost_device *dev,
struct mali_attribute_packed *out,
unsigned present)
{
pan_emit_vary(dev, out, present, 0, MALI_CONSTANT, 0);
}
/* Special records */ /* Special records */
static const enum mali_format pan_varying_formats[PAN_VARY_MAX] = { static const struct {
[PAN_VARY_POSITION] = MALI_SNAP_4, unsigned components;
[PAN_VARY_PSIZ] = MALI_R16F, enum mali_format format;
[PAN_VARY_PNTCOORD] = MALI_R16F, } pan_varying_formats[PAN_VARY_MAX] = {
[PAN_VARY_FACE] = MALI_R32I, [PAN_VARY_POSITION] = { 4, MALI_SNAP_4 },
[PAN_VARY_FRAGCOORD] = MALI_RGBA32F [PAN_VARY_PSIZ] = { 1, MALI_R16F },
[PAN_VARY_PNTCOORD] = { 1, MALI_R16F },
[PAN_VARY_FACE] = { 1, MALI_R32I },
[PAN_VARY_FRAGCOORD] = { 4, MALI_RGBA32F },
}; };
static mali_pixel_format
pan_special_format(const struct panfrost_device *dev,
enum pan_special_varying buf)
{
assert(buf < PAN_VARY_MAX);
mali_pixel_format format = (pan_varying_formats[buf].format << 12);
if (dev->quirks & HAS_SWIZZLES) {
unsigned nr = pan_varying_formats[buf].components;
format |= panfrost_get_default_swizzle(nr);
}
return format;
}
static void static void
pan_emit_vary_special(const struct panfrost_device *dev, pan_emit_vary_special(const struct panfrost_device *dev,
struct mali_attribute_packed *out, struct mali_attribute_packed *out,
unsigned present, enum pan_special_varying buf) unsigned present, enum pan_special_varying buf)
{ {
assert(buf < PAN_VARY_MAX); pan_emit_vary(dev, out, pan_varying_index(present, buf),
pan_emit_vary(dev, out, present, buf, pan_varying_formats[buf], 0); pan_special_format(dev, buf), 0);
} }
static enum mali_format /* Negative indicates a varying is not found */
pan_xfb_format(enum mali_format format, unsigned nr)
static signed
pan_find_vary(const struct pan_shader_varying *vary,
unsigned vary_count, unsigned loc)
{ {
if (MALI_EXTRACT_BITS(format) == MALI_CHANNEL_FLOAT) for (unsigned i = 0; i < vary_count; ++i) {
return MALI_R32F | MALI_NR_CHANNELS(nr); if (vary[i].location == loc)
else return i;
return MALI_EXTRACT_TYPE(format) | MALI_NR_CHANNELS(nr) | MALI_CHANNEL_32;
}
/* Transform feedback records. Note struct pipe_stream_output is (if packed as
* a bitfield) 32-bit, smaller than a 64-bit pointer, so may as well pass by
* value. */
static void
pan_emit_vary_xfb(const struct panfrost_device *dev,
struct mali_attribute_packed *out,
unsigned present,
unsigned max_xfb,
unsigned *streamout_offsets,
enum mali_format format,
struct pipe_stream_output o)
{
unsigned swizzle = dev->quirks & HAS_SWIZZLES ?
panfrost_get_default_swizzle(o.num_components) :
panfrost_bifrost_swizzle(o.num_components);
pan_pack(out, ATTRIBUTE, cfg) {
/* XFB buffers come after everything else */
cfg.buffer_index = pan_xfb_base(present) + o.output_buffer;
cfg.offset_enable = !pan_is_bifrost(dev);
/* Override number of channels and precision to highp */
cfg.format = (pan_xfb_format(format, o.num_components) << 12) | swizzle;
/* Apply given offsets together */
cfg.offset = (o.dst_offset * 4) /* dwords */
+ streamout_offsets[o.output_buffer];
} }
return -1;
} }
/* Determine if we should capture a varying for XFB. This requires actually /* Assign varying locations for the general buffer. Returns the calculated
* having a buffer for it. If we don't capture it, we'll fallback to a general * per-vertex stride, and outputs offsets into the passed array. Negative
* varying path (linked or unlinked, possibly discarding the write) */ * offset indicates a varying is not used. */
static bool static unsigned
panfrost_xfb_captured(struct panfrost_shader_state *xfb, pan_assign_varyings(const struct panfrost_device *dev,
unsigned loc, unsigned max_xfb) struct pan_shader_info *producer,
struct pan_shader_info *consumer,
signed *offsets)
{ {
if (!(xfb->so_mask & (1ll << loc))) unsigned producer_count = producer->varyings.output_count;
return false; unsigned consumer_count = consumer->varyings.input_count;
struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc); const struct pan_shader_varying *producer_vars = producer->varyings.output;
return o->output_buffer < max_xfb; const struct pan_shader_varying *consumer_vars = consumer->varyings.input;
}
static void unsigned stride = 0;
pan_emit_general_varying(const struct panfrost_device *dev,
struct mali_attribute_packed *out,
struct panfrost_shader_state *other,
struct panfrost_shader_state *xfb,
gl_varying_slot loc,
enum mali_format format,
unsigned present,
unsigned *gen_offsets,
enum mali_format *gen_formats,
unsigned *gen_stride,
unsigned idx,
bool should_alloc)
{
/* Check if we're linked */
unsigned other_varying_count =
other->info.stage == MESA_SHADER_FRAGMENT ?
other->info.varyings.input_count :
other->info.varyings.output_count;
const struct pan_shader_varying *other_varyings =
other->info.stage == MESA_SHADER_FRAGMENT ?
other->info.varyings.input :
other->info.varyings.output;
signed other_idx = -1;
for (unsigned j = 0; j < other_varying_count; ++j) { for (unsigned i = 0; i < producer_count; ++i) {
if (other_varyings[j].location == loc) { signed loc = pan_find_vary(consumer_vars, consumer_count,
other_idx = j; producer_vars[i].location);
break;
if (loc >= 0) {
offsets[i] = stride;
enum pipe_format format = producer_vars[i].format;
stride += util_format_get_blocksize(format);
} else {
offsets[i] = -1;
} }
} }
if (other_idx < 0) { return stride;
pan_emit_vary_only(dev, out, present);
return;
}
unsigned offset = gen_offsets[other_idx];
if (should_alloc) {
/* We're linked, so allocate a space via a watermark allocation */
enum mali_format alt =
dev->formats[other_varyings[other_idx].format].hw >> 12;
/* Do interpolation at minimum precision */
unsigned size_main = pan_varying_size(format);
unsigned size_alt = pan_varying_size(alt);
unsigned size = MIN2(size_main, size_alt);
/* If a varying is marked for XFB but not actually captured, we
* should match the format to the format that would otherwise
* be used for XFB, since dEQP checks for invariance here. It's
* unclear if this is required by the spec. */
if (xfb->so_mask & (1ull << loc)) {
struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
format = pan_xfb_format(format, o->num_components);
size = pan_varying_size(format);
} else if (size == size_alt) {
format = alt;
}
gen_offsets[idx] = *gen_stride;
gen_formats[other_idx] = format;
offset = *gen_stride;
*gen_stride += size;
}
pan_emit_vary(dev, out, present, PAN_VARY_GENERAL, format, offset);
} }
/* Higher-level wrapper around all of the above, classifying a varying into one /* Emitter for a single varying (attribute) descriptor */
* of the above types */
static void static void
panfrost_emit_varying(const struct panfrost_device *dev, panfrost_emit_varying(const struct panfrost_device *dev,
struct mali_attribute_packed *out, struct mali_attribute_packed *out,
struct panfrost_shader_state *stage, const struct pan_shader_varying varying,
struct panfrost_shader_state *other, enum pipe_format pipe_format,
struct panfrost_shader_state *xfb,
unsigned present, unsigned present,
uint16_t point_sprite_mask, uint16_t point_sprite_mask,
struct pipe_stream_output_info *xfb,
uint64_t xfb_loc_mask,
unsigned max_xfb, unsigned max_xfb,
unsigned *streamout_offsets, unsigned *xfb_offsets,
unsigned *gen_offsets, signed offset,
enum mali_format *gen_formats, enum pan_special_varying pos_varying)
unsigned *gen_stride,
unsigned idx,
bool should_alloc,
bool is_fragment)
{ {
gl_varying_slot loc = /* Note: varying.format != pipe_format in some obscure cases due to a
stage->info.stage == MESA_SHADER_FRAGMENT ? * limitation of the NIR linker. This should be fixed in the future to
stage->info.varyings.input[idx].location : * eliminate the additional lookups. See:
stage->info.varyings.output[idx].location; * dEQP-GLES3.functional.shaders.conditionals.if.sequence_statements_vertex
enum mali_format format = */
stage->info.stage == MESA_SHADER_FRAGMENT ? gl_varying_slot loc = varying.location;
dev->formats[stage->info.varyings.input[idx].format].hw >> 12 : mali_pixel_format format = dev->formats[pipe_format].hw;
dev->formats[stage->info.varyings.output[idx].format].hw >> 12;
/* Override format to match linkage */ struct pipe_stream_output *o = (xfb_loc_mask & BITFIELD64_BIT(loc)) ?
if (!should_alloc && gen_formats[idx]) pan_get_so(xfb, loc) : NULL;
format = gen_formats[idx];
if (util_varying_is_point_coord(loc, point_sprite_mask)) { if (util_varying_is_point_coord(loc, point_sprite_mask)) {
pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD); pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD);
} else if (panfrost_xfb_captured(xfb, loc, max_xfb)) { } else if (o && o->output_buffer < max_xfb) {
struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc); unsigned fixup_offset = xfb_offsets[o->output_buffer] & 63;
pan_emit_vary_xfb(dev, out, present, max_xfb, streamout_offsets, format, *o);
pan_emit_vary(dev, out,
pan_xfb_base(present) + o->output_buffer,
format, (o->dst_offset * 4) + fixup_offset);
} else if (loc == VARYING_SLOT_POS) { } else if (loc == VARYING_SLOT_POS) {
if (is_fragment) pan_emit_vary_special(dev, out, present, pos_varying);
pan_emit_vary_special(dev, out, present, PAN_VARY_FRAGCOORD);
else
pan_emit_vary_special(dev, out, present, PAN_VARY_POSITION);
} else if (loc == VARYING_SLOT_PSIZ) { } else if (loc == VARYING_SLOT_PSIZ) {
pan_emit_vary_special(dev, out, present, PAN_VARY_PSIZ); pan_emit_vary_special(dev, out, present, PAN_VARY_PSIZ);
} else if (loc == VARYING_SLOT_PNTC) {
pan_emit_vary_special(dev, out, present, PAN_VARY_PNTCOORD);
} else if (loc == VARYING_SLOT_FACE) { } else if (loc == VARYING_SLOT_FACE) {
pan_emit_vary_special(dev, out, present, PAN_VARY_FACE); pan_emit_vary_special(dev, out, present, PAN_VARY_FACE);
} else if (offset < 0) {
pan_emit_vary(dev, out, 0, (MALI_CONSTANT << 12), 0);
} else { } else {
pan_emit_general_varying(dev, out, other, xfb, loc, format, present, STATIC_ASSERT(PAN_VARY_GENERAL == 0);
gen_offsets, gen_formats, gen_stride, pan_emit_vary(dev, out, 0, format, offset);
idx, should_alloc); }
}
/* Links varyings and uploads ATTRIBUTE descriptors. Can execute at link time,
* rather than draw time (under good conditions). */
struct pan_linkage {
/* Uploaded attribute descriptors */
mali_ptr producer, consumer;
/* Varyings buffers required */
uint32_t present;
/* Per-vertex stride for general varying buffer */
uint32_t stride;
};
static void
panfrost_emit_varying_descs(
struct pan_pool *pool,
struct panfrost_shader_state *producer,
struct panfrost_shader_state *consumer,
struct panfrost_streamout *xfb,
uint16_t point_coord_mask,
struct pan_linkage *out)
{
struct panfrost_device *dev = pool->dev;
struct pipe_stream_output_info *xfb_info = &producer->stream_output;
unsigned producer_count = producer->info.varyings.output_count;
unsigned consumer_count = consumer->info.varyings.input_count;
/* Offsets within the general varying buffer, indexed by location */
signed offsets[PIPE_MAX_ATTRIBS];
assert(producer_count < ARRAY_SIZE(offsets));
assert(consumer_count < ARRAY_SIZE(offsets));
/* Allocate enough descriptors for both shader stages */
struct panfrost_ptr T = panfrost_pool_alloc_desc_array(pool,
producer_count + consumer_count, ATTRIBUTE);
struct mali_attribute_packed *descs = T.cpu;
out->producer = producer_count ? T.gpu : 0;
out->consumer = consumer_count ? T.gpu +
(MALI_ATTRIBUTE_LENGTH * producer_count) : 0;
/* Lay out the varyings. Must use producer to lay out, in order to
* respect transform feedback precisions. */
out->present = pan_varying_present(dev, &producer->info,
&consumer->info, point_coord_mask);
out->stride = pan_assign_varyings(dev, &producer->info,
&consumer->info, offsets);
unsigned xfb_offsets[PIPE_MAX_SO_BUFFERS];
for (unsigned i = 0; i < xfb->num_targets; ++i) {
xfb_offsets[i] = panfrost_xfb_offset(xfb_info->stride[i] * 4,
xfb->targets[i]);
}
for (unsigned i = 0; i < producer_count; ++i) {
panfrost_emit_varying(dev, descs + i,
producer->info.varyings.output[i],
producer->info.varyings.output[i].format,
out->present, 0, &producer->stream_output,
producer->so_mask, xfb->num_targets,
xfb_offsets, offsets[i], PAN_VARY_POSITION);
}
for (unsigned i = 0; i < consumer_count; ++i) {
signed j = pan_find_vary(producer->info.varyings.output,
producer->info.varyings.output_count,
consumer->info.varyings.input[i].location);
signed offset = (j >= 0) ? offsets[j] : -1;
panfrost_emit_varying(dev, descs + producer_count + i,
consumer->info.varyings.input[i],
producer->info.varyings.output[j].format,
out->present, point_coord_mask,
&producer->stream_output, producer->so_mask,
xfb->num_targets, xfb_offsets, offset,
PAN_VARY_FRAGCOORD);
} }
} }
@ -2176,7 +2152,7 @@ pan_emit_special_input(struct mali_attribute_buffer_packed *out,
enum pan_special_varying v, enum pan_special_varying v,
unsigned special) unsigned special)
{ {
if (present & (1 << v)) { if (present & BITFIELD_BIT(v)) {
unsigned idx = pan_varying_index(present, v); unsigned idx = pan_varying_index(present, v);
pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) { pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) {
@ -2201,69 +2177,22 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
struct panfrost_context *ctx = batch->ctx; struct panfrost_context *ctx = batch->ctx;
struct panfrost_device *dev = pan_device(ctx->base.screen); struct panfrost_device *dev = pan_device(ctx->base.screen);
struct panfrost_shader_state *vs, *fs; struct panfrost_shader_state *vs, *fs;
size_t vs_size; struct pan_linkage linkage;
/* Allocate the varying descriptor */
vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX); vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
vs_size = MALI_ATTRIBUTE_LENGTH * vs->info.varyings.output_count;
struct panfrost_ptr trans =
panfrost_pool_alloc_desc_array(&batch->pool,
vs->info.varyings.output_count +
fs->info.varyings.input_count,
ATTRIBUTE);
struct pipe_stream_output_info *so = &vs->stream_output;
uint16_t point_coord_mask = ctx->rasterizer->base.sprite_coord_enable; uint16_t point_coord_mask = ctx->rasterizer->base.sprite_coord_enable;
/* TODO: point sprites need lowering on Bifrost */ /* TODO: point sprites need lowering on Bifrost */
if (!point_coord_replace || pan_is_bifrost(dev)) if (!point_coord_replace || pan_is_bifrost(dev))
point_coord_mask = 0; point_coord_mask = 0;
unsigned present = pan_varying_present(dev, vs, fs, point_coord_mask); /* Emit ATTRIBUTE descriptors */
panfrost_emit_varying_descs(&batch->pool, vs, fs, &ctx->streamout, point_coord_mask, &linkage);
/* Check if this varying is linked by us. This is the case for struct pipe_stream_output_info *so = &vs->stream_output;
* general-purpose, non-captured varyings. If it is, link it. If it's unsigned xfb_base = pan_xfb_base(linkage.present);
* not, use the provided stream out information to determine the
* offset, since it was already linked for us. */
unsigned gen_offsets[32];
enum mali_format gen_formats[32];
memset(gen_offsets, 0, sizeof(gen_offsets));
memset(gen_formats, 0, sizeof(gen_formats));
unsigned gen_stride = 0;
assert(vs->info.varyings.output_count < ARRAY_SIZE(gen_offsets));
assert(fs->info.varyings.input_count < ARRAY_SIZE(gen_offsets));
unsigned streamout_offsets[32];
for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
streamout_offsets[i] = panfrost_streamout_offset(
so->stride[i],
ctx->streamout.targets[i]);
}
struct mali_attribute_packed *ovs = (struct mali_attribute_packed *)trans.cpu;
struct mali_attribute_packed *ofs = ovs + vs->info.varyings.output_count;
for (unsigned i = 0; i < vs->info.varyings.output_count; i++) {
panfrost_emit_varying(dev, ovs + i, vs, fs, vs, present, 0,
ctx->streamout.num_targets, streamout_offsets,
gen_offsets, gen_formats, &gen_stride, i,
true, false);
}
for (unsigned i = 0; i < fs->info.varyings.input_count; i++) {
panfrost_emit_varying(dev, ofs + i, fs, vs, vs, present, point_coord_mask,
ctx->streamout.num_targets, streamout_offsets,
gen_offsets, gen_formats, &gen_stride, i,
false, true);
}
unsigned xfb_base = pan_xfb_base(present);
struct panfrost_ptr T = struct panfrost_ptr T =
panfrost_pool_alloc_desc_array(&batch->pool, panfrost_pool_alloc_desc_array(&batch->pool,
xfb_base + xfb_base +
@ -2285,33 +2214,36 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) { for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
panfrost_emit_streamout(batch, &varyings[xfb_base + i], panfrost_emit_streamout(batch, &varyings[xfb_base + i],
so->stride[i], so->stride[i] * 4,
out_count, out_count,
ctx->streamout.targets[i]); ctx->streamout.targets[i]);
} }
panfrost_emit_varyings(batch, panfrost_emit_varyings(batch,
&varyings[pan_varying_index(present, PAN_VARY_GENERAL)], &varyings[pan_varying_index(linkage.present, PAN_VARY_GENERAL)],
gen_stride, vertex_count); linkage.stride, vertex_count);
/* fp32 vec4 gl_Position */ /* fp32 vec4 gl_Position */
*position = panfrost_emit_varyings(batch, *position = panfrost_emit_varyings(batch,
&varyings[pan_varying_index(present, PAN_VARY_POSITION)], &varyings[pan_varying_index(linkage.present, PAN_VARY_POSITION)],
sizeof(float) * 4, vertex_count); sizeof(float) * 4, vertex_count);
if (present & (1 << PAN_VARY_PSIZ)) { if (linkage.present & BITFIELD_BIT(PAN_VARY_PSIZ)) {
*psiz = panfrost_emit_varyings(batch, *psiz = panfrost_emit_varyings(batch,
&varyings[pan_varying_index(present, PAN_VARY_PSIZ)], &varyings[pan_varying_index(linkage.present, PAN_VARY_PSIZ)],
2, vertex_count); 2, vertex_count);
} }
pan_emit_special_input(varyings, present, PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD); pan_emit_special_input(varyings, linkage.present,
pan_emit_special_input(varyings, present, PAN_VARY_FACE, MALI_ATTRIBUTE_SPECIAL_FRONT_FACING); PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD);
pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_ATTRIBUTE_SPECIAL_FRAG_COORD); pan_emit_special_input(varyings, linkage.present, PAN_VARY_FACE,
MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
pan_emit_special_input(varyings, linkage.present, PAN_VARY_FRAGCOORD,
MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
*buffers = T.gpu; *buffers = T.gpu;
*vs_attribs = vs->info.varyings.output_count ? trans.gpu : 0; *vs_attribs = linkage.producer;
*fs_attribs = fs->info.varyings.input_count ? trans.gpu + vs_size : 0; *fs_attribs = linkage.consumer;
} }
void void

View file

@ -114,7 +114,8 @@ collect_varyings(nir_shader *s, nir_variable_mode varying_mode,
*/ */
if (type == nir_type_float && if (type == nir_type_float &&
(var->data.precision == GLSL_PRECISION_MEDIUM || (var->data.precision == GLSL_PRECISION_MEDIUM ||
var->data.precision == GLSL_PRECISION_LOW)) { var->data.precision == GLSL_PRECISION_LOW) &&
!s->info.has_transform_feedback_varyings) {
type |= 16; type |= 16;
} else { } else {
type |= 32; type |= 32;