mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-20 05:40:39 +02:00
asahi: Implement multidraw indirect
GS only for now (inserting a passthru GS if needed). This should be optimized later, but it's ~correct. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26614>
This commit is contained in:
parent
5987e47a29
commit
0fa7252d8a
12 changed files with 383 additions and 60 deletions
|
|
@ -228,7 +228,7 @@ GL 4.5, GLSL 4.50 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, v
|
|||
GL 4.6, GLSL 4.60 -- all DONE: radeonsi, virgl, zink, iris, crocus/gen7+, d3d12
|
||||
|
||||
GL_ARB_gl_spirv DONE (freedreno, llvmpipe)
|
||||
GL_ARB_indirect_parameters DONE (freedreno/a6xx+, nvc0, llvmpipe, virgl)
|
||||
GL_ARB_indirect_parameters DONE (freedreno/a6xx+, nvc0, llvmpipe, virgl, asahi)
|
||||
GL_ARB_pipeline_statistics_query DONE (freedreno/a6xx+, nvc0, r600, llvmpipe, softpipe, crocus/gen6+)
|
||||
GL_ARB_polygon_offset_clamp DONE (freedreno, nv50, nvc0, r600, llvmpipe, v3d, panfrost, crocus)
|
||||
GL_ARB_shader_atomic_counter_ops DONE (freedreno/a5xx+, nvc0, r600, llvmpipe, softpipe, v3d)
|
||||
|
|
|
|||
|
|
@ -12,3 +12,4 @@ GL_ARB_base_instance on Asahi
|
|||
OpenGL 4.6 (up from 4.2) on d3d12
|
||||
VK_EXT_depth_clamp_zero_one on RADV
|
||||
GL_ARB_shader_texture_image_samples on Asahi
|
||||
GL_ARB_indirect_parameters on Asahi
|
||||
|
|
|
|||
|
|
@ -215,6 +215,7 @@ compile(void *memctx, const uint32_t *spirv, size_t spirv_size)
|
|||
|
||||
NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
|
||||
NIR_PASS_V(nir, nir_opt_if, nir_opt_if_aggressive_last_continue);
|
||||
NIR_PASS_V(nir, nir_opt_idiv_const, 16);
|
||||
|
||||
optimize(nir);
|
||||
|
||||
|
|
|
|||
|
|
@ -972,13 +972,12 @@ link_libagx(nir_shader *nir, const nir_shader *libagx)
|
|||
|
||||
void
|
||||
agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
|
||||
bool rasterizer_discard, nir_shader **gs_count,
|
||||
nir_shader **gs_copy, nir_shader **pre_gs,
|
||||
enum mesa_prim *out_mode, unsigned *out_count_words)
|
||||
struct agx_ia_key *ia, bool rasterizer_discard,
|
||||
nir_shader **gs_count, nir_shader **gs_copy,
|
||||
nir_shader **pre_gs, enum mesa_prim *out_mode,
|
||||
unsigned *out_count_words)
|
||||
{
|
||||
link_libagx(vs, libagx);
|
||||
NIR_PASS_V(vs, nir_lower_idiv,
|
||||
&(const nir_lower_idiv_options){.allow_fp16 = true});
|
||||
|
||||
/* Collect output component counts so we can size the geometry output buffer
|
||||
* appropriately, instead of assuming everything is vec4.
|
||||
|
|
@ -1037,6 +1036,17 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
|
|||
NIR_PASS(progress, gs, nir_opt_loop_unroll);
|
||||
} while (progress);
|
||||
|
||||
if (ia->indirect_multidraw)
|
||||
NIR_PASS_V(gs, agx_nir_lower_multidraw, ia);
|
||||
|
||||
NIR_PASS_V(gs, nir_shader_intrinsics_pass, lower_id,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
link_libagx(gs, libagx);
|
||||
|
||||
NIR_PASS_V(gs, nir_lower_idiv,
|
||||
&(const nir_lower_idiv_options){.allow_fp16 = true});
|
||||
|
||||
/* All those variables we created should've gone away by now */
|
||||
NIR_PASS_V(gs, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
|
|
@ -1156,14 +1166,22 @@ agx_nir_prefix_sum_gs(const nir_shader *libagx, unsigned words)
|
|||
}
|
||||
|
||||
nir_shader *
|
||||
agx_nir_gs_setup_indirect(const nir_shader *libagx, enum mesa_prim prim)
|
||||
agx_nir_gs_setup_indirect(const nir_shader *libagx, enum mesa_prim prim,
|
||||
bool multidraw)
|
||||
{
|
||||
nir_builder b = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_COMPUTE, &agx_nir_options, "GS indirect setup");
|
||||
|
||||
libagx_gs_setup_indirect(&b, nir_load_geometry_param_buffer_agx(&b),
|
||||
nir_load_input_assembly_buffer_agx(&b),
|
||||
nir_imm_int(&b, prim));
|
||||
if (multidraw) {
|
||||
uint32_t subgroup_size = 32;
|
||||
b.shader->info.workgroup_size[0] = subgroup_size;
|
||||
}
|
||||
|
||||
libagx_gs_setup_indirect(
|
||||
&b, nir_load_geometry_param_buffer_agx(&b),
|
||||
nir_load_input_assembly_buffer_agx(&b), nir_imm_int(&b, prim),
|
||||
nir_channel(&b, nir_load_local_invocation_id(&b), 0),
|
||||
nir_imm_bool(&b, multidraw));
|
||||
|
||||
UNUSED struct agx_uncompiled_shader_info info;
|
||||
agx_preprocess_nir(b.shader, libagx, false, &info);
|
||||
|
|
|
|||
|
|
@ -14,16 +14,19 @@ enum mesa_prim;
|
|||
|
||||
void agx_nir_lower_ia(struct nir_shader *s, struct agx_ia_key *ia);
|
||||
|
||||
void agx_nir_lower_multidraw(struct nir_shader *s, struct agx_ia_key *key);
|
||||
|
||||
void agx_nir_lower_gs(struct nir_shader *gs, struct nir_shader *vs,
|
||||
const struct nir_shader *libagx, bool rasterizer_discard,
|
||||
struct nir_shader **gs_count, struct nir_shader **gs_copy,
|
||||
struct nir_shader **pre_gs, enum mesa_prim *out_mode,
|
||||
unsigned *out_count_words);
|
||||
const struct nir_shader *libagx, struct agx_ia_key *ia,
|
||||
bool rasterizer_discard, struct nir_shader **gs_count,
|
||||
struct nir_shader **gs_copy, struct nir_shader **pre_gs,
|
||||
enum mesa_prim *out_mode, unsigned *out_count_words);
|
||||
|
||||
struct nir_shader *agx_nir_prefix_sum_gs(const struct nir_shader *libagx,
|
||||
unsigned words);
|
||||
|
||||
struct nir_shader *agx_nir_gs_setup_indirect(const struct nir_shader *libagx,
|
||||
enum mesa_prim prim);
|
||||
enum mesa_prim prim,
|
||||
bool multidraw);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -6,10 +6,26 @@
|
|||
#include "asahi/compiler/agx_compile.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "shaders/geometry.h"
|
||||
#include "util/compiler.h"
|
||||
#include "agx_nir_lower_gs.h"
|
||||
#include "libagx_shaders.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
#include "nir_intrinsics.h"
|
||||
|
||||
/*
|
||||
* This file implements input assembly in software for geometry/tessellation
|
||||
* shaders. load_vertex_id is lowered based on the topology. Most of the logic
|
||||
* lives in CL library routines.
|
||||
*
|
||||
* When geom/tess is used, multidraw indirect is implemented by:
|
||||
*
|
||||
* 1. Prefix summing the vertex counts across draws.
|
||||
* 2. Issuing a single indirect draw for the summed vertices.
|
||||
* 3. Binary searching the prefix sum buffer in software index fetch.
|
||||
*
|
||||
* This multidraw implementation kicks off the prefix sum and lowered draw.
|
||||
*/
|
||||
|
||||
static nir_def *
|
||||
load_vertex_id(nir_builder *b, struct agx_ia_key *key)
|
||||
|
|
@ -24,9 +40,20 @@ load_vertex_id(nir_builder *b, struct agx_ia_key *key)
|
|||
* vertex ID is just the index as-is.
|
||||
*/
|
||||
if (key->index_size) {
|
||||
nir_def *ia = nir_load_input_assembly_buffer_agx(b);
|
||||
|
||||
/*
|
||||
* For multidraw, apply the index buffer offset. For !multidraw, this is
|
||||
* handled ahead-of-time and baked into the index buffer pointer.
|
||||
*/
|
||||
if (key->indirect_multidraw) {
|
||||
nir_def *first = libagx_multidraw_param(b, ia, nir_load_draw_id(b),
|
||||
nir_imm_int(b, 2));
|
||||
id = nir_iadd(b, id, first);
|
||||
}
|
||||
|
||||
nir_def *address =
|
||||
libagx_index_buffer(b, nir_load_input_assembly_buffer_agx(b), id,
|
||||
nir_imm_int(b, key->index_size));
|
||||
libagx_index_buffer(b, ia, id, nir_imm_int(b, key->index_size));
|
||||
|
||||
nir_def *index = nir_load_global_constant(b, address, key->index_size, 1,
|
||||
key->index_size * 8);
|
||||
|
|
@ -53,9 +80,95 @@ lower_vertex_id(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
}
|
||||
|
||||
void
|
||||
agx_nir_lower_ia(nir_shader *s, struct agx_ia_key *ia)
|
||||
agx_nir_lower_ia(nir_shader *s, struct agx_ia_key *key)
|
||||
{
|
||||
nir_shader_intrinsics_pass(s, lower_vertex_id,
|
||||
nir_metadata_block_index | nir_metadata_dominance,
|
||||
ia);
|
||||
key);
|
||||
}
|
||||
|
||||
struct multidraw_state {
|
||||
nir_def *raw_id, *draw, *primitive, *first_vertex, *base_instance;
|
||||
nir_def *num_vertices;
|
||||
|
||||
bool indexed;
|
||||
};
|
||||
|
||||
static nir_def *
|
||||
map_multidraw_param(nir_builder *b, nir_intrinsic_op intrin,
|
||||
struct multidraw_state *state)
|
||||
{
|
||||
switch (intrin) {
|
||||
case nir_intrinsic_load_draw_id:
|
||||
return state->draw;
|
||||
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
return state->primitive;
|
||||
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
return state->indexed ? state->first_vertex : nir_imm_int(b, 0);
|
||||
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
return state->first_vertex;
|
||||
|
||||
case nir_intrinsic_load_base_instance:
|
||||
return state->base_instance;
|
||||
|
||||
case nir_intrinsic_load_num_vertices:
|
||||
return state->num_vertices;
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_multidraw(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
nir_def *id = map_multidraw_param(b, intr->intrinsic, data);
|
||||
if (!id)
|
||||
return false;
|
||||
|
||||
nir_instr_remove(&intr->instr);
|
||||
nir_def_rewrite_uses(&intr->def, id);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
agx_nir_lower_multidraw(nir_shader *s, struct agx_ia_key *key)
|
||||
{
|
||||
assert(key->indirect_multidraw);
|
||||
|
||||
nir_builder b_ =
|
||||
nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
|
||||
nir_builder *b = &b_;
|
||||
|
||||
struct multidraw_state state = {
|
||||
/* Filled in at the end to avoid recursion */
|
||||
.raw_id = nir_undef(b, 1, 32),
|
||||
.indexed = key->index_size > 0,
|
||||
};
|
||||
|
||||
nir_def *ia = nir_load_input_assembly_buffer_agx(b);
|
||||
state.draw = libagx_multidraw_draw_id(b, ia, state.raw_id);
|
||||
|
||||
state.primitive = libagx_multidraw_primitive_id(
|
||||
b, ia, state.draw, state.raw_id, nir_imm_int(b, key->mode));
|
||||
|
||||
state.num_vertices =
|
||||
libagx_multidraw_param(b, ia, state.draw, nir_imm_int(b, 0));
|
||||
|
||||
state.first_vertex = libagx_multidraw_param(
|
||||
b, ia, state.draw, nir_imm_int(b, state.indexed ? 3 : 2));
|
||||
|
||||
state.base_instance = libagx_multidraw_param(
|
||||
b, ia, state.draw, nir_imm_int(b, state.indexed ? 4 : 3));
|
||||
|
||||
nir_shader_intrinsics_pass(b->shader, lower_multidraw,
|
||||
nir_metadata_block_index | nir_metadata_dominance,
|
||||
&state);
|
||||
|
||||
b->cursor = nir_before_impl(b->impl);
|
||||
nir_def_rewrite_uses(state.raw_id, nir_load_primitive_id(b));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -110,6 +110,40 @@ libagx_index_buffer(constant struct agx_ia_state *p, uint id,
|
|||
return (uintptr_t)&p->index_buffer[id * index_size];
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_multidraw_draw_id(constant struct agx_ia_state *p, uint raw_id)
|
||||
{
|
||||
global uint *sums = p->prefix_sums;
|
||||
|
||||
/* TODO: replace with binary search or interpolation search */
|
||||
uint i = 0;
|
||||
for (i = 0; raw_id >= sums[i]; ++i)
|
||||
;
|
||||
return i;
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_multidraw_param(constant struct agx_ia_state *p, uint draw_id, uint word)
|
||||
{
|
||||
global uint *draw = (global uint *)(p->draws + (draw_id * p->draw_stride));
|
||||
return draw[word];
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_multidraw_primitive_id(constant struct agx_ia_state *p, uint draw_id,
|
||||
uint raw_id, enum mesa_prim mode)
|
||||
{
|
||||
uint start = draw_id > 0 ? p->prefix_sums[draw_id - 1] : 0;
|
||||
uint raw_offset = raw_id - start;
|
||||
|
||||
/* Note: if we wanted, we could precompute magic divisors in the setup kernel
|
||||
* to avoid the non-constant division here.
|
||||
*/
|
||||
uint vertex_count = libagx_multidraw_param(p, draw_id, 0);
|
||||
uint primitive_count = u_decomposed_prims_for_vertices(mode, vertex_count);
|
||||
return raw_offset % primitive_count;
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i)
|
||||
{
|
||||
|
|
@ -208,12 +242,61 @@ process_draw(global uint *draw, enum mesa_prim mode)
|
|||
return (uint2)(prim_per_instance, instance_count);
|
||||
}
|
||||
|
||||
uint2
|
||||
process_multidraw(global struct agx_ia_state *s, uint local_id,
|
||||
enum mesa_prim mode)
|
||||
{
|
||||
uintptr_t draw_ptr = s->draws;
|
||||
uint draw_stride = s->draw_stride;
|
||||
|
||||
/* Prefix sum the vertex counts (multiplied by instance counts) across draws.
|
||||
* The number of draws is expected to be small, so this serialization should
|
||||
* be ok in practice. See libagx_prefix_sum for algorithm details.
|
||||
*/
|
||||
uint i, count = 0;
|
||||
uint len = *(s->count);
|
||||
uint len_remainder = len % 32;
|
||||
uint len_rounded_down = len - len_remainder;
|
||||
|
||||
for (i = local_id; i < len_rounded_down; i += 32) {
|
||||
global uint *draw_ = (global uint *)(draw_ptr + (i * draw_stride));
|
||||
uint2 draw = process_draw(draw_, mode);
|
||||
|
||||
/* Total primitives */
|
||||
uint value = draw.x * draw.y;
|
||||
|
||||
/* TODO: use inclusive once that's wired up */
|
||||
uint value_prefix_sum = sub_group_scan_exclusive_add(value) + value;
|
||||
s->prefix_sums[i] = count + value_prefix_sum;
|
||||
count += sub_group_broadcast(value_prefix_sum, 31);
|
||||
}
|
||||
|
||||
if (local_id < len_remainder) {
|
||||
global uint *draw_ = (global uint *)(draw_ptr + (i * draw_stride));
|
||||
uint2 draw = process_draw(draw_, mode);
|
||||
uint value = draw.x * draw.y;
|
||||
|
||||
/* TODO: use inclusive once that's wired up */
|
||||
s->prefix_sums[i] = count + sub_group_scan_exclusive_add(value) + value;
|
||||
}
|
||||
|
||||
return (uint2)(len > 0 ? s->prefix_sums[len - 1] : 0, 1);
|
||||
}
|
||||
|
||||
void
|
||||
libagx_gs_setup_indirect(global struct agx_geometry_params *p,
|
||||
global struct agx_ia_state *ia, enum mesa_prim mode)
|
||||
global struct agx_ia_state *ia, enum mesa_prim mode,
|
||||
uint local_id, bool multidraw)
|
||||
{
|
||||
/* Determine the (primitives, instances) grid size. */
|
||||
uint2 draw = process_draw(p->input_indirect_desc, mode);
|
||||
/* Determine the (primitives, instances) grid size. For multidraw, this will
|
||||
* be a synthetic grid for the entire collection, but that's ok.
|
||||
*/
|
||||
uint2 draw = multidraw ? process_multidraw(ia, local_id, mode)
|
||||
: process_draw((global uint *)ia->draws, mode);
|
||||
|
||||
/* Elect a single lane */
|
||||
if (multidraw && local_id != 0)
|
||||
return;
|
||||
|
||||
/* There are primitives*instances primitives total */
|
||||
p->input_primitives = draw.x * draw.y;
|
||||
|
|
@ -227,9 +310,12 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p,
|
|||
* in elements. Apply that offset now that we have it. For a hardware
|
||||
* indirect draw, the hardware would do this for us, but for software input
|
||||
* assembly we need to do it ourselves.
|
||||
*
|
||||
* For multidraw, this happens per-draw in the input assembly instead. We
|
||||
* could do that for non-multidraw too, but it'd be less efficient.
|
||||
*/
|
||||
if (ia->index_buffer) {
|
||||
ia->index_buffer += p->input_indirect_desc[2] * ia->index_size_B;
|
||||
if (ia->index_buffer && !multidraw) {
|
||||
ia->index_buffer += ((constant uint *)ia->draws)[2] * ia->index_size_B;
|
||||
}
|
||||
|
||||
/* We may need to allocate a GS count buffer, do so now */
|
||||
|
|
|
|||
|
|
@ -10,9 +10,11 @@
|
|||
#ifndef __OPENCL_VERSION__
|
||||
#include "util/macros.h"
|
||||
#define GLOBAL(type_) uint64_t
|
||||
#define CONST(type_) uint64_t
|
||||
#else
|
||||
#define PACKED
|
||||
#define GLOBAL(type_) global type_ *
|
||||
#define CONST(type_) constant type_ *
|
||||
#endif
|
||||
|
||||
#ifndef LIBAGX_GEOMETRY_H
|
||||
|
|
@ -30,11 +32,33 @@ struct agx_ia_key {
|
|||
|
||||
/* Use first vertex as the provoking vertex for flat shading */
|
||||
bool flatshade_first;
|
||||
|
||||
/* Whether we are doing input assembly for an indirect multidraw that is
|
||||
* implemented by a single superdraw with a prefix sum of vertex counts per
|
||||
* draw. This requires lowering lots of sysvals to index into the draw
|
||||
* descriptors according to the associated dynamic multidraw state.
|
||||
*/
|
||||
bool indirect_multidraw;
|
||||
};
|
||||
|
||||
struct agx_ia_state {
|
||||
/* Input: index buffer if present. */
|
||||
GLOBAL(uchar) index_buffer;
|
||||
CONST(uchar) index_buffer;
|
||||
|
||||
/* Input: draw count */
|
||||
CONST(uint) count;
|
||||
|
||||
/* Input: indirect draw descriptor. Raw pointer since it's strided. */
|
||||
uint64_t draws;
|
||||
|
||||
/* For the geom/tess path, this is the temporary prefix sum buffer.
|
||||
* Caller-allocated. For regular MDI, this is ok since the CPU knows the
|
||||
* worst-case draw count.
|
||||
*/
|
||||
GLOBAL(uint) prefix_sums;
|
||||
|
||||
/* Stride for the draw descrptor array */
|
||||
uint32_t draw_stride;
|
||||
|
||||
/* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
|
||||
uint8_t index_size_B;
|
||||
|
|
@ -89,9 +113,6 @@ struct agx_geometry_params {
|
|||
*/
|
||||
uint32_t xfb_prims[MAX_VERTEX_STREAMS];
|
||||
|
||||
/* Address of input indirect buffer for indirect GS draw */
|
||||
GLOBAL(uint) input_indirect_desc;
|
||||
|
||||
/* Within an indirect GS draw, the grid used to dispatch the GS written out
|
||||
* by the GS indirect setup kernel. Unused for direct GS draws.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -117,7 +117,8 @@ load_texture_handle(nir_builder *b, nir_intrinsic_instr *intr, void *base)
|
|||
}
|
||||
|
||||
static nir_def *
|
||||
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
bool lower_draw_params)
|
||||
{
|
||||
struct agx_draw_uniforms *u = NULL;
|
||||
struct agx_stage_uniforms *s = NULL;
|
||||
|
|
@ -151,6 +152,20 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_get_ssbo_size:
|
||||
return load_sysval_indirect(b, 1, 32, stage_table(b), &s->ssbo_size,
|
||||
intr->src[0].ssa);
|
||||
case nir_intrinsic_load_layer_id_written_agx:
|
||||
return load_sysval_root(b, 1, 16, &u->layer_id_written);
|
||||
case nir_intrinsic_load_input_assembly_buffer_agx:
|
||||
return load_sysval_root(b, 1, 64, &u->input_assembly);
|
||||
case nir_intrinsic_load_geometry_param_buffer_agx:
|
||||
return load_sysval_root(b, 1, 64, &u->geometry_params);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!lower_draw_params)
|
||||
return NULL;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
return load_sysval(b, 3, 32, AGX_SYSVAL_TABLE_GRID, 0);
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
|
|
@ -166,12 +181,6 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
|
|||
load_sysval(b, 1, 32, AGX_SYSVAL_TABLE_PARAMS, 0), nir_imm_int(b, 0));
|
||||
case nir_intrinsic_load_draw_id:
|
||||
return load_sysval_root(b, 1, 32, &u->draw_id);
|
||||
case nir_intrinsic_load_layer_id_written_agx:
|
||||
return load_sysval_root(b, 1, 16, &u->layer_id_written);
|
||||
case nir_intrinsic_load_input_assembly_buffer_agx:
|
||||
return load_sysval_root(b, 1, 64, &u->input_assembly);
|
||||
case nir_intrinsic_load_geometry_param_buffer_agx:
|
||||
return load_sysval_root(b, 1, 64, &u->geometry_params);
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -181,6 +190,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
|
|||
static bool
|
||||
lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
bool *lower_draw_params = data;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_def *old;
|
||||
nir_def *replacement = NULL;
|
||||
|
|
@ -188,7 +198,7 @@ lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
|||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
old = &intr->def;
|
||||
replacement = lower_intrinsic(b, intr);
|
||||
replacement = lower_intrinsic(b, intr, *lower_draw_params);
|
||||
} else if (instr->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
old = &tex->def;
|
||||
|
|
@ -353,11 +363,11 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
|
|||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_sysvals(nir_shader *shader)
|
||||
agx_nir_lower_sysvals(nir_shader *shader, bool lower_draw_params)
|
||||
{
|
||||
return nir_shader_instructions_pass(
|
||||
shader, lower_sysvals, nir_metadata_block_index | nir_metadata_dominance,
|
||||
NULL);
|
||||
&lower_draw_params);
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -1643,6 +1643,8 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_SAMPLE_SHADING:
|
||||
case PIPE_CAP_START_INSTANCE:
|
||||
case PIPE_CAP_DRAW_PARAMETERS:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT:
|
||||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
return 1;
|
||||
case PIPE_CAP_SURFACE_SAMPLE_COUNT:
|
||||
/* TODO: MSRTT */
|
||||
|
|
|
|||
|
|
@ -1645,7 +1645,7 @@ agx_compile_nir(struct agx_device *dev, nir_shader *nir,
|
|||
dev->params.num_dies > 1;
|
||||
key.libagx = dev->libagx;
|
||||
|
||||
NIR_PASS_V(nir, agx_nir_lower_sysvals);
|
||||
NIR_PASS_V(nir, agx_nir_lower_sysvals, true);
|
||||
NIR_PASS_V(nir, agx_nir_layout_uniforms, compiled, &key.reserved_preamble);
|
||||
|
||||
agx_compile_shader_nir(nir, &key, debug, &binary, &compiled->info);
|
||||
|
|
@ -1712,20 +1712,18 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
|||
|
||||
/* Apply the VS key to the VS before linking it in */
|
||||
NIR_PASS_V(vs, agx_nir_lower_vbo, &key->vbuf);
|
||||
NIR_PASS_V(vs, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
NIR_PASS_V(vs, agx_nir_lower_ia, &key->ia);
|
||||
|
||||
NIR_PASS_V(vs, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
|
||||
/* Lower IA before VS sysvals to correctly handle indirect multidraws */
|
||||
agx_nir_lower_ia(vs, &key->ia);
|
||||
|
||||
/* Lower VS sysvals before it's merged in, so we access the correct shader
|
||||
* stage for UBOs etc.
|
||||
* stage for UBOs etc. Skip draw parameters, those are lowered later.
|
||||
*/
|
||||
NIR_PASS_V(vs, agx_nir_lower_sysvals);
|
||||
NIR_PASS_V(vs, agx_nir_lower_sysvals, false);
|
||||
|
||||
/* Link VS with GS */
|
||||
NIR_PASS_V(nir, agx_nir_lower_gs, vs, dev->libagx,
|
||||
NIR_PASS_V(nir, agx_nir_lower_gs, vs, dev->libagx, &key->ia,
|
||||
key->rasterizer_discard, &gs_count, &gs_copy, &pre_gs,
|
||||
&gs_out_prim, &gs_out_count_words);
|
||||
ralloc_free(vs);
|
||||
|
|
@ -2178,7 +2176,8 @@ ia_needs_provoking(enum mesa_prim prim)
|
|||
}
|
||||
|
||||
static bool
|
||||
agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info)
|
||||
agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect)
|
||||
{
|
||||
/* Only proceed if there is a geometry shader. Due to input assembly
|
||||
* dependence, we don't bother to dirty track right now.
|
||||
|
|
@ -2196,6 +2195,8 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info)
|
|||
.ia.mode = translate_ia_mode(info->mode),
|
||||
.ia.flatshade_first =
|
||||
ia_needs_provoking(info->mode) && ctx->rast->base.flatshade_first,
|
||||
.ia.indirect_multidraw =
|
||||
indirect && indirect->indirect_draw_count != NULL,
|
||||
|
||||
.rasterizer_discard = ctx->rast->base.rasterizer_discard,
|
||||
};
|
||||
|
|
@ -3403,6 +3404,25 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
|||
.index_size_B = info->index_size,
|
||||
};
|
||||
|
||||
if (indirect) {
|
||||
struct agx_resource *rsrc = agx_resource(indirect->buffer);
|
||||
agx_batch_reads(batch, rsrc);
|
||||
|
||||
ia.draws = rsrc->bo->ptr.gpu + indirect->offset;
|
||||
}
|
||||
|
||||
if (indirect && indirect->indirect_draw_count) {
|
||||
struct agx_resource *rsrc = agx_resource(indirect->indirect_draw_count);
|
||||
agx_batch_reads(batch, rsrc);
|
||||
|
||||
ia.count = rsrc->bo->ptr.gpu + indirect->indirect_draw_count_offset;
|
||||
ia.draw_stride = indirect->stride;
|
||||
|
||||
size_t max_sum_size = sizeof(uint32_t) * indirect->draw_count;
|
||||
ia.prefix_sums =
|
||||
agx_pool_alloc_aligned(&batch->pool, max_sum_size, 4).gpu;
|
||||
}
|
||||
|
||||
batch->uniforms.input_assembly =
|
||||
agx_pool_upload_aligned(&batch->pool, &ia, sizeof(ia), 8);
|
||||
|
||||
|
|
@ -3448,10 +3468,7 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
|||
unsigned count_buffer_stride = batch->ctx->gs->gs_count_words * 4;
|
||||
|
||||
if (indirect) {
|
||||
struct agx_resource *rsrc = agx_resource(indirect->buffer);
|
||||
params.input_indirect_desc = rsrc->bo->ptr.gpu + indirect->offset;
|
||||
params.count_buffer_stride = count_buffer_stride;
|
||||
agx_batch_reads(batch, rsrc);
|
||||
} else {
|
||||
unsigned prim_per_instance =
|
||||
u_decomposed_prims_for_vertices(info->mode, draw->count);
|
||||
|
|
@ -3498,20 +3515,23 @@ agx_launch_gs(struct agx_batch *batch, const struct pipe_draw_info *info,
|
|||
if (indirect) {
|
||||
assert(indirect->buffer && "drawauto already handled");
|
||||
|
||||
if (!ctx->gs_setup_indirect[info->mode]) {
|
||||
bool multidraw = (indirect->indirect_draw_count != NULL);
|
||||
|
||||
if (!ctx->gs_setup_indirect[info->mode][multidraw]) {
|
||||
struct agx_shader_key base_key = {0};
|
||||
|
||||
ctx->gs_setup_indirect[info->mode] = agx_compile_nir(
|
||||
dev, agx_nir_gs_setup_indirect(dev->libagx, info->mode), &base_key,
|
||||
NULL);
|
||||
ctx->gs_setup_indirect[info->mode][multidraw] = agx_compile_nir(
|
||||
dev, agx_nir_gs_setup_indirect(dev->libagx, info->mode, multidraw),
|
||||
&base_key, NULL);
|
||||
}
|
||||
|
||||
const struct pipe_grid_info grid_1x1 = {
|
||||
.block = {1, 1, 1},
|
||||
const struct pipe_grid_info grid_setup = {
|
||||
.block = {multidraw ? 32 : 1, 1, 1},
|
||||
.grid = {1, 1, 1},
|
||||
};
|
||||
|
||||
agx_launch(batch, &grid_1x1, ctx->gs_setup_indirect[info->mode],
|
||||
agx_launch(batch, &grid_setup,
|
||||
ctx->gs_setup_indirect[info->mode][multidraw],
|
||||
PIPE_SHADER_COMPUTE);
|
||||
|
||||
/* Wrap the pool allocation in a fake resource for meta-Gallium use */
|
||||
|
|
@ -3651,6 +3671,12 @@ agx_needs_passthrough_gs(struct agx_context *ctx,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* TODO: also sloppy, we should generate VDM commands from a shader */
|
||||
if (indirect && indirect->indirect_draw_count) {
|
||||
perf_debug_ctx(ctx, "Using passthrough GS due to multidraw indirect");
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Transform feedback is layered on geometry shaders, so if transform
|
||||
* feedback is used, we need a GS.
|
||||
*/
|
||||
|
|
@ -3741,6 +3767,38 @@ agx_apply_passthrough_gs(struct agx_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
util_draw_multi_unroll_indirect(struct pipe_context *pctx,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws)
|
||||
{
|
||||
for (unsigned i = 0; i < indirect->draw_count; ++i) {
|
||||
const struct pipe_draw_indirect_info subindirect = {
|
||||
.buffer = indirect->buffer,
|
||||
.count_from_stream_output = indirect->count_from_stream_output,
|
||||
.offset = indirect->offset + (i * indirect->stride),
|
||||
.draw_count = 1,
|
||||
};
|
||||
|
||||
pctx->draw_vbo(pctx, info, i, &subindirect, draws, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
util_draw_multi_upload_indirect(struct pipe_context *pctx,
|
||||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_indirect_info *indirect,
|
||||
const struct pipe_draw_start_count_bias *draws)
|
||||
{
|
||||
struct pipe_draw_indirect_info indirect_ = *indirect;
|
||||
u_upload_data(pctx->const_uploader, 0, 4, 4, &indirect->draw_count,
|
||||
&indirect_.indirect_draw_count_offset,
|
||||
&indirect_.indirect_draw_count);
|
||||
|
||||
pctx->draw_vbo(pctx, info, 0, &indirect_, draws, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
||||
unsigned drawid_offset,
|
||||
|
|
@ -3757,6 +3815,14 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
return;
|
||||
}
|
||||
|
||||
if (indirect && indirect->draw_count > 1 && !indirect->indirect_draw_count) {
|
||||
assert(drawid_offset == 0);
|
||||
assert(num_draws == 1);
|
||||
|
||||
util_draw_multi_upload_indirect(pctx, info, indirect, draws);
|
||||
return;
|
||||
}
|
||||
|
||||
if (indirect && indirect->count_from_stream_output) {
|
||||
agx_draw_vbo_from_xfb(pctx, info, drawid_offset, indirect);
|
||||
return;
|
||||
|
|
@ -3824,7 +3890,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
(ctx->dirty & AGX_DIRTY_VERTEX))
|
||||
ctx->dirty |= AGX_DIRTY_VS;
|
||||
|
||||
agx_update_gs(ctx, info);
|
||||
agx_update_gs(ctx, info, indirect);
|
||||
|
||||
if (ctx->gs) {
|
||||
batch->geom_indirect = agx_pool_alloc_aligned_with_bo(
|
||||
|
|
@ -3933,6 +3999,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
return;
|
||||
}
|
||||
|
||||
assert((!indirect || !indirect->indirect_draw_count) && "multidraw handled");
|
||||
|
||||
/* Update batch masks based on current state */
|
||||
if (ctx->dirty & AGX_DIRTY_BLEND) {
|
||||
/* TODO: Any point to tracking load? */
|
||||
|
|
|
|||
|
|
@ -509,7 +509,7 @@ struct agx_context {
|
|||
struct util_dynarray global_buffers;
|
||||
|
||||
struct agx_compiled_shader *gs_prefix_sums[16];
|
||||
struct agx_compiled_shader *gs_setup_indirect[MESA_PRIM_MAX];
|
||||
struct agx_compiled_shader *gs_setup_indirect[MESA_PRIM_MAX][2];
|
||||
struct agx_meta_cache meta;
|
||||
|
||||
uint32_t syncobj;
|
||||
|
|
@ -795,7 +795,7 @@ void agx_upload_uniforms(struct agx_batch *batch);
|
|||
uint64_t agx_upload_stage_uniforms(struct agx_batch *batch, uint64_t textures,
|
||||
enum pipe_shader_type stage);
|
||||
|
||||
bool agx_nir_lower_sysvals(nir_shader *shader);
|
||||
bool agx_nir_lower_sysvals(nir_shader *shader, bool lower_draw_params);
|
||||
|
||||
bool agx_nir_layout_uniforms(nir_shader *shader,
|
||||
struct agx_compiled_shader *compiled,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue