mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-19 01:40:43 +01:00
asahi: implement VBO robustness
GL semantics. GLES (weaker) and VK (stronger) semantics are left as a todo, with explanations given. Enabled always to deal with null VBOs, this should be optimized once we have soft fault. This necessitates a rework of VBO keys, but hopefully for the best. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27616>
This commit is contained in:
parent
4aadf67523
commit
5dc0f5ccba
6 changed files with 152 additions and 28 deletions
|
|
@ -171,7 +171,35 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
|
|||
el = nir_load_vertex_id(b);
|
||||
}
|
||||
|
||||
nir_def *base = nir_load_vbo_base_agx(b, nir_imm_int(b, attrib.buf));
|
||||
/* VBO bases are per-attribute, otherwise they're per-buffer. This allows
|
||||
* memory sinks to work properly with robustness, allows folding
|
||||
* the src_offset into the VBO base to save an add in the shader, and reduces
|
||||
* the size of the vertex fetch key. That last piece allows reusing a linked
|
||||
* VS with both separate and interleaved attributes.
|
||||
*/
|
||||
nir_def *buf_handle = nir_imm_int(b, index);
|
||||
|
||||
/* Robustness is handled at the ID level */
|
||||
nir_def *bounds = nir_load_attrib_clamp_agx(b, buf_handle);
|
||||
|
||||
/* For now, robustness is always applied. This gives GL robustness semantics.
|
||||
* For robustBufferAccess2, we'll want to check for out-of-bounds access
|
||||
* (where el > bounds), and replace base with the address of a zero sink.
|
||||
* With soft fault and a large enough sink, we don't need to clamp the index,
|
||||
* allowing that robustness behaviour to be implemented in 2 cmpsel
|
||||
* before the load. That is faster than the 4 cmpsel required after the load,
|
||||
* and it avoids waiting on the load which should help prolog performance.
|
||||
*
|
||||
* TODO: Plumb through soft fault information to skip this.
|
||||
*
|
||||
* TODO: Add a knob for robustBufferAccess2 semantics.
|
||||
*/
|
||||
bool robust = true;
|
||||
if (robust) {
|
||||
el = nir_umin(b, el, bounds);
|
||||
}
|
||||
|
||||
nir_def *base = nir_load_vbo_base_agx(b, buf_handle);
|
||||
|
||||
assert((stride % interchange_align) == 0 && "must be aligned");
|
||||
assert((offset % interchange_align) == 0 && "must be aligned");
|
||||
|
|
|
|||
|
|
@ -1839,10 +1839,16 @@ store("agx", [1, 1], [ACCESS, BASE, FORMAT, SIGN_EXTEND])
|
|||
# Logical complement of load_front_face, mapping to an AGX system value
|
||||
system_value("back_face_agx", 1, bit_sizes=[1, 32])
|
||||
|
||||
# Load the base address of an indexed VBO (for lowering VBOs)
|
||||
# Load the base address of an indexed vertex attribute (for lowering).
|
||||
intrinsic("load_vbo_base_agx", src_comp=[1], dest_comp=1, bit_sizes=[64],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# When vertex robustness is enabled, loads the maximum valid attribute index for
|
||||
# a given attribute. This is unsigned: the driver ensures that at least one
|
||||
# vertex is always valid to load, directing loads to a zero sink if necessary.
|
||||
intrinsic("load_attrib_clamp_agx", src_comp=[1], dest_comp=1,
|
||||
bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Load a driver-internal system value from a given system value set at a given
|
||||
# binding within the set. This is used for correctness when lowering things like
|
||||
# UBOs with merged shaders.
|
||||
|
|
|
|||
|
|
@ -137,8 +137,11 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
return load_sysval_indirect(b, 1, 16, stage_table(b), &s->sampler_handle,
|
||||
intr->src[0].ssa);
|
||||
case nir_intrinsic_load_vbo_base_agx:
|
||||
return load_sysval_indirect(b, 1, 64, AGX_SYSVAL_TABLE_ROOT, &u->vbo_base,
|
||||
intr->src[0].ssa);
|
||||
return load_sysval_indirect(b, 1, 64, AGX_SYSVAL_TABLE_ROOT,
|
||||
&u->attrib_base, intr->src[0].ssa);
|
||||
case nir_intrinsic_load_attrib_clamp_agx:
|
||||
return load_sysval_indirect(b, 1, 32, AGX_SYSVAL_TABLE_ROOT,
|
||||
&u->attrib_clamp, intr->src[0].ssa);
|
||||
case nir_intrinsic_load_blend_const_color_r_float:
|
||||
return load_sysval_root(b, 1, 32, &u->blend_constant[0]);
|
||||
case nir_intrinsic_load_blend_const_color_g_float:
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@
|
|||
#include "agx_device.h"
|
||||
#include "agx_disk_cache.h"
|
||||
#include "agx_nir_lower_gs.h"
|
||||
#include "agx_nir_lower_vbo.h"
|
||||
#include "agx_tilebuffer.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
|
|
@ -1461,7 +1462,8 @@ agx_create_vertex_elements(struct pipe_context *ctx, unsigned count,
|
|||
{
|
||||
assert(count <= AGX_MAX_ATTRIBS);
|
||||
|
||||
struct agx_attribute *attribs = calloc(sizeof(*attribs), AGX_MAX_ATTRIBS);
|
||||
struct agx_vertex_elements *so = calloc(1, sizeof(*so));
|
||||
|
||||
for (unsigned i = 0; i < count; ++i) {
|
||||
const struct pipe_vertex_element ve = state[i];
|
||||
|
||||
|
|
@ -1470,16 +1472,17 @@ agx_create_vertex_elements(struct pipe_context *ctx, unsigned count,
|
|||
unsigned chan_size = desc->channel[0].size / 8;
|
||||
assert((ve.src_offset & (chan_size - 1)) == 0);
|
||||
|
||||
attribs[i] = (struct agx_attribute){
|
||||
.buf = ve.vertex_buffer_index,
|
||||
.src_offset = ve.src_offset,
|
||||
so->buffers[i] = ve.vertex_buffer_index;
|
||||
so->src_offsets[i] = ve.src_offset;
|
||||
|
||||
so->key[i] = (struct agx_velem_key){
|
||||
.stride = ve.src_stride,
|
||||
.format = ve.src_format,
|
||||
.divisor = ve.instance_divisor,
|
||||
};
|
||||
}
|
||||
|
||||
return attribs;
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1836,6 +1839,22 @@ agx_nir_lower_poly_stipple(nir_shader *s)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_vbo(nir_shader *s, struct agx_velem_key *key)
|
||||
{
|
||||
struct agx_attribute out[AGX_MAX_VBUFS];
|
||||
|
||||
for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) {
|
||||
out[i] = (struct agx_attribute){
|
||||
.divisor = key[i].divisor,
|
||||
.stride = key[i].stride,
|
||||
.format = key[i].format,
|
||||
};
|
||||
}
|
||||
|
||||
return agx_nir_lower_vbo(s, out);
|
||||
}
|
||||
|
||||
/* Does not take ownership of key. Clones if necessary. */
|
||||
static struct agx_compiled_shader *
|
||||
agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
||||
|
|
@ -1864,7 +1883,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
struct asahi_vs_shader_key *key = &key_->vs;
|
||||
|
||||
NIR_PASS(_, nir, agx_nir_lower_vbo, key->attribs);
|
||||
NIR_PASS(_, nir, lower_vbo, key->attribs);
|
||||
NIR_PASS(_, nir, agx_nir_lower_point_size, key->fixed_point_size);
|
||||
|
||||
if (should_lower_clip_m1_1(dev, key->clip_halfz)) {
|
||||
|
|
@ -1881,7 +1900,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
nir_shader *vs = nir_deserialize(NULL, &agx_nir_options, &vs_reader);
|
||||
|
||||
/* Apply the VS key to the VS before linking it in */
|
||||
NIR_PASS_V(vs, agx_nir_lower_vbo, key->attribs);
|
||||
NIR_PASS_V(vs, lower_vbo, key->attribs);
|
||||
NIR_PASS_V(vs, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
NIR_PASS_V(vs, agx_nir_lower_sysvals, false);
|
||||
|
||||
|
|
@ -1903,7 +1922,7 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
nir_shader *vs = nir_deserialize(NULL, &agx_nir_options, &vs_reader);
|
||||
|
||||
/* Apply the VS key to the VS before linking it in */
|
||||
NIR_PASS(_, vs, agx_nir_lower_vbo, key->attribs);
|
||||
NIR_PASS(_, vs, lower_vbo, key->attribs);
|
||||
NIR_PASS(_, vs, agx_nir_lower_ia, &key->ia);
|
||||
|
||||
NIR_PASS(_, vs, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
|
|
@ -2245,8 +2264,7 @@ agx_create_shader_state(struct pipe_context *pctx,
|
|||
switch (so->type) {
|
||||
case PIPE_SHADER_VERTEX: {
|
||||
for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) {
|
||||
key.vs.attribs[i] = (struct agx_attribute){
|
||||
.buf = i,
|
||||
key.vs.attribs[i] = (struct agx_velem_key){
|
||||
.stride = 16,
|
||||
.format = PIPE_FORMAT_R32G32B32A32_FLOAT,
|
||||
};
|
||||
|
|
@ -2409,8 +2427,7 @@ agx_update_vs(struct agx_context *ctx)
|
|||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded,
|
||||
};
|
||||
|
||||
memcpy(key.attribs, ctx->attributes,
|
||||
sizeof(key.attribs[0]) * AGX_MAX_ATTRIBS);
|
||||
memcpy(key.attribs, &ctx->attributes->key, sizeof(key.attribs));
|
||||
|
||||
return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX,
|
||||
(union asahi_shader_key *)&key);
|
||||
|
|
@ -2441,8 +2458,7 @@ agx_update_tcs(struct agx_context *ctx, const struct pipe_draw_info *info)
|
|||
.index_size_B = info->index_size,
|
||||
};
|
||||
|
||||
memcpy(key.attribs, ctx->attributes,
|
||||
sizeof(key.attribs[0]) * AGX_MAX_ATTRIBS);
|
||||
memcpy(key.attribs, &ctx->attributes->key, sizeof(key.attribs));
|
||||
|
||||
static_assert(sizeof(key.input_nir_sha1) ==
|
||||
sizeof(ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1),
|
||||
|
|
@ -2491,8 +2507,7 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
|
|||
.rasterizer_discard = ctx->rast->base.rasterizer_discard,
|
||||
};
|
||||
|
||||
memcpy(key.attribs, ctx->attributes,
|
||||
sizeof(key.attribs[0]) * AGX_MAX_ATTRIBS);
|
||||
memcpy(key.attribs, &ctx->attributes->key, sizeof(key.attribs));
|
||||
|
||||
static_assert(sizeof(key.input_nir_sha1) ==
|
||||
sizeof(ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1),
|
||||
|
|
|
|||
|
|
@ -100,8 +100,13 @@ struct PACKED agx_draw_uniforms {
|
|||
/* Pointers to the system value tables themselves (for indirection) */
|
||||
uint64_t tables[AGX_NUM_SYSVAL_TABLES];
|
||||
|
||||
/* Vertex buffer object bases, if present */
|
||||
uint64_t vbo_base[PIPE_MAX_ATTRIBS];
|
||||
/* Vertex buffer object bases, if present. If vertex robustness is disabled,
|
||||
* attrib_base maps VBOs directly and attrib_max_index is undefined. If
|
||||
* vertex robustness is enabled, attrib_base maps attributes and
|
||||
* attrib_clamp is an inclusive clamp on vertex/divided instance indices.
|
||||
*/
|
||||
uint64_t attrib_base[PIPE_MAX_ATTRIBS];
|
||||
uint32_t attrib_clamp[PIPE_MAX_ATTRIBS];
|
||||
|
||||
/* Address of input assembly buffer if geom/tess is used, else 0 */
|
||||
uint64_t input_assembly;
|
||||
|
|
@ -400,14 +405,31 @@ struct agx_blend {
|
|||
uint32_t store;
|
||||
};
|
||||
|
||||
/* These parts of the vertex element affect the generated code */
|
||||
struct agx_velem_key {
|
||||
uint32_t divisor;
|
||||
uint16_t stride;
|
||||
uint8_t format;
|
||||
uint8_t pad;
|
||||
};
|
||||
|
||||
struct asahi_vs_shader_key {
|
||||
struct agx_attribute attribs[AGX_MAX_VBUFS];
|
||||
struct agx_velem_key attribs[AGX_MAX_VBUFS];
|
||||
bool clip_halfz;
|
||||
bool fixed_point_size;
|
||||
uint64_t outputs_flat_shaded;
|
||||
uint64_t outputs_linear_shaded;
|
||||
};
|
||||
|
||||
struct agx_vertex_elements {
|
||||
unsigned num_attribs;
|
||||
struct agx_velem_key key[PIPE_MAX_ATTRIBS];
|
||||
|
||||
/* These parts do not affect the generated code so are not in the key */
|
||||
uint16_t src_offsets[PIPE_MAX_ATTRIBS];
|
||||
uint16_t buffers[PIPE_MAX_ATTRIBS];
|
||||
};
|
||||
|
||||
struct asahi_fs_shader_key {
|
||||
struct agx_blend_key blend;
|
||||
|
||||
|
|
@ -429,7 +451,7 @@ struct asahi_tcs_shader_key {
|
|||
uint8_t index_size_B;
|
||||
|
||||
/* Vertex shader key */
|
||||
struct agx_attribute attribs[AGX_MAX_VBUFS];
|
||||
struct agx_velem_key attribs[AGX_MAX_VBUFS];
|
||||
|
||||
/* Tessellation control shaders must be linked with a vertex shader. */
|
||||
uint8_t input_nir_sha1[20];
|
||||
|
|
@ -440,7 +462,7 @@ struct asahi_gs_shader_key {
|
|||
struct agx_ia_key ia;
|
||||
|
||||
/* Vertex shader key */
|
||||
struct agx_attribute attribs[AGX_MAX_VBUFS];
|
||||
struct agx_velem_key attribs[AGX_MAX_VBUFS];
|
||||
|
||||
/* If true, this GS is run only for its side effects (including XFB) */
|
||||
bool rasterizer_discard;
|
||||
|
|
@ -561,7 +583,7 @@ struct agx_context {
|
|||
float default_inner_level[2];
|
||||
|
||||
struct agx_stage stage[PIPE_SHADER_TYPES];
|
||||
struct agx_attribute *attributes;
|
||||
struct agx_vertex_elements *attributes;
|
||||
struct agx_rasterizer *rast;
|
||||
struct agx_zsa *zs;
|
||||
struct agx_blend *blend;
|
||||
|
|
|
|||
|
|
@ -4,7 +4,9 @@
|
|||
*/
|
||||
#include <stdio.h>
|
||||
#include "asahi/lib/agx_pack.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "agx_state.h"
|
||||
#include "pool.h"
|
||||
|
||||
static uint64_t
|
||||
agx_const_buffer_ptr(struct agx_batch *batch, struct pipe_constant_buffer *cb)
|
||||
|
|
@ -38,7 +40,13 @@ void
|
|||
agx_upload_vbos(struct agx_batch *batch)
|
||||
{
|
||||
struct agx_context *ctx = batch->ctx;
|
||||
struct agx_vertex_elements *attribs = ctx->attributes;
|
||||
uint64_t buffers[PIPE_MAX_ATTRIBS] = {0};
|
||||
size_t buf_sizes[PIPE_MAX_ATTRIBS] = {0};
|
||||
|
||||
/* TODO: To handle null vertex buffers, we use robustness always. Once we
|
||||
* support soft fault in the kernel, we can optimize this.
|
||||
*/
|
||||
u_foreach_bit(vbo, ctx->vb_mask) {
|
||||
struct pipe_vertex_buffer vb = ctx->vertex_buffers[vbo];
|
||||
assert(!vb.is_user_buffer);
|
||||
|
|
@ -47,9 +55,51 @@ agx_upload_vbos(struct agx_batch *batch)
|
|||
struct agx_resource *rsrc = agx_resource(vb.buffer.resource);
|
||||
agx_batch_reads(batch, rsrc);
|
||||
|
||||
batch->uniforms.vbo_base[vbo] = rsrc->bo->ptr.gpu + vb.buffer_offset;
|
||||
buffers[vbo] = rsrc->bo->ptr.gpu + vb.buffer_offset;
|
||||
buf_sizes[vbo] = rsrc->layout.size_B - vb.buffer_offset;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < PIPE_MAX_ATTRIBS; ++i) {
|
||||
unsigned buffer_size = buf_sizes[attribs->buffers[i]];
|
||||
|
||||
/* Determine the maximum vertex/divided instance index. For robustness,
|
||||
* the index will be clamped to this before reading (if soft fault is
|
||||
* disabled).
|
||||
*
|
||||
* Index i accesses up to (exclusive) offset:
|
||||
*
|
||||
* src_offset + (i * stride) + elsize_B
|
||||
*
|
||||
* so we require
|
||||
*
|
||||
* src_offset + (i * stride) + elsize_B <= size
|
||||
*
|
||||
* <==>
|
||||
*
|
||||
* i <= floor((size - src_offset - elsize_B) / stride)
|
||||
*/
|
||||
unsigned elsize_B = util_format_get_blocksize(attribs->key[i].format);
|
||||
unsigned subtracted = attribs->src_offsets[i] + elsize_B;
|
||||
|
||||
if (buffer_size >= subtracted) {
|
||||
/* At least one index is valid, determine the max. If this is zero,
|
||||
* only 1 index is valid.
|
||||
*/
|
||||
unsigned max_index =
|
||||
(buffer_size - subtracted) / attribs->key[i].stride;
|
||||
|
||||
batch->uniforms.attrib_base[i] =
|
||||
buffers[attribs->buffers[i]] + attribs->src_offsets[i];
|
||||
|
||||
batch->uniforms.attrib_clamp[i] = max_index;
|
||||
} else {
|
||||
batch->uniforms.vbo_base[vbo] = 0;
|
||||
/* No indices are valid. Direct reads to a single zero. */
|
||||
uint32_t zeroes[4] = {0};
|
||||
uint64_t sink = agx_pool_upload_aligned(&batch->pool, &zeroes, 16, 16);
|
||||
|
||||
batch->uniforms.attrib_base[i] = sink;
|
||||
batch->uniforms.attrib_clamp[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue