mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 04:40:09 +01:00
hk,libagx: handle adjacency without a GS
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32759>
This commit is contained in:
parent
872487919b
commit
5bc89aa991
10 changed files with 228 additions and 35 deletions
|
|
@ -62,6 +62,9 @@ struct agx_vs_prolog_key {
|
|||
/* If !hw and the draw call is indexed, the index size */
|
||||
uint8_t sw_index_size_B;
|
||||
|
||||
/* Adjacency primitive to emulate */
|
||||
enum mesa_prim adjacency;
|
||||
|
||||
/* Robustness settings for the vertex fetch */
|
||||
struct agx_robustness robustness;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ struct nir_def *agx_load_per_vertex_input(struct nir_builder *b,
|
|||
nir_intrinsic_instr *intr,
|
||||
struct nir_def *vertex);
|
||||
|
||||
nir_def *agx_nir_load_vertex_id(struct nir_builder *b, nir_def *id,
|
||||
unsigned index_size_B);
|
||||
|
||||
bool agx_nir_lower_sw_vs(struct nir_shader *s, unsigned index_size_B);
|
||||
|
||||
bool agx_nir_lower_vs_before_gs(struct nir_shader *vs,
|
||||
|
|
|
|||
|
|
@ -14,11 +14,9 @@
|
|||
* vertex shaders, as part of geometry/tessellation lowering. It does not apply
|
||||
* the topology, which happens in the geometry shader.
|
||||
*/
|
||||
static nir_def *
|
||||
load_vertex_id(nir_builder *b, unsigned index_size_B)
|
||||
nir_def *
|
||||
agx_nir_load_vertex_id(nir_builder *b, nir_def *id, unsigned index_size_B)
|
||||
{
|
||||
nir_def *id = nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
|
||||
|
||||
/* If drawing with an index buffer, pull the vertex ID. Otherwise, the
|
||||
* vertex ID is just the index as-is.
|
||||
*/
|
||||
|
|
@ -40,7 +38,8 @@ lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_vertex_id) {
|
||||
nir_def_replace(&intr->def, load_vertex_id(b, *index_size_B));
|
||||
nir_def *id = nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
|
||||
nir_def_replace(&intr->def, agx_nir_load_vertex_id(b, id, *index_size_B));
|
||||
return true;
|
||||
} else if (intr->intrinsic == nir_intrinsic_load_instance_id) {
|
||||
nir_def_replace(&intr->def,
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include "agx_nir_lower_vbo.h"
|
||||
#include "agx_pack.h"
|
||||
#include "agx_tilebuffer.h"
|
||||
#include "libagx.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
|
|
@ -138,6 +139,36 @@ lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_adjacency(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
const struct agx_vs_prolog_key *key = data;
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_load_vertex_id)
|
||||
return false;
|
||||
|
||||
nir_def *id = nir_load_vertex_id(b);
|
||||
|
||||
if (key->adjacency == MESA_PRIM_LINES_ADJACENCY) {
|
||||
id = libagx_map_to_line_adj(b, id);
|
||||
} else if (key->adjacency == MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) {
|
||||
id = libagx_map_to_tri_strip_adj(b, id);
|
||||
} else if (key->adjacency == MESA_PRIM_LINE_STRIP_ADJACENCY) {
|
||||
id = libagx_map_to_line_strip_adj(b, id);
|
||||
} else if (key->adjacency == MESA_PRIM_TRIANGLES_ADJACENCY) {
|
||||
/* Sequence (0, 2, 4), (6, 8, 10), ... */
|
||||
id = nir_imul_imm(b, id, 2);
|
||||
} else {
|
||||
unreachable("unknown");
|
||||
}
|
||||
|
||||
id = agx_nir_load_vertex_id(b, id, key->sw_index_size_B);
|
||||
|
||||
nir_def_replace(&intr->def, id);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
agx_nir_vs_prolog(nir_builder *b, const void *key_)
|
||||
{
|
||||
|
|
@ -169,8 +200,17 @@ agx_nir_vs_prolog(nir_builder *b, const void *key_)
|
|||
/* Now lower the resulting program using the key */
|
||||
lower_vbo(b->shader, key->attribs, key->robustness);
|
||||
|
||||
/* Clean up redundant vertex ID loads */
|
||||
if (!key->hw || key->adjacency) {
|
||||
NIR_PASS(_, b->shader, nir_opt_cse);
|
||||
NIR_PASS(_, b->shader, nir_opt_dce);
|
||||
}
|
||||
|
||||
if (!key->hw) {
|
||||
agx_nir_lower_sw_vs(b->shader, key->sw_index_size_B);
|
||||
} else if (key->adjacency) {
|
||||
nir_shader_intrinsics_pass(b->shader, lower_adjacency,
|
||||
nir_metadata_control_flow, (void *)key);
|
||||
}
|
||||
|
||||
/* Finally, lower uniforms according to our ABI */
|
||||
|
|
|
|||
|
|
@ -28,6 +28,34 @@ libagx_predicate_indirect(global uint32_t *out, constant uint32_t *in,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Indexing/offseting is in software if necessary so we strip all
|
||||
* indexing/offset information.
|
||||
*/
|
||||
KERNEL(1)
|
||||
libagx_draw_without_adj(global VkDrawIndirectCommand *out,
|
||||
global VkDrawIndirectCommand *in,
|
||||
global struct agx_ia_state *ia, uint64_t index_buffer,
|
||||
uint64_t index_buffer_range_el, int index_size_B,
|
||||
enum mesa_prim prim)
|
||||
{
|
||||
*out = (VkDrawIndirectCommand){
|
||||
.vertexCount = libagx_remap_adj_count(in->vertexCount, prim),
|
||||
.instanceCount = in->instanceCount,
|
||||
};
|
||||
|
||||
/* TODO: Deduplicate */
|
||||
if (index_size_B) {
|
||||
uint offs = in->firstVertex;
|
||||
|
||||
ia->index_buffer = libagx_index_buffer(
|
||||
index_buffer, index_buffer_range_el, offs, index_size_B, 0);
|
||||
|
||||
ia->index_buffer_range_el =
|
||||
libagx_index_buffer_range_el(index_buffer_range_el, offs);
|
||||
}
|
||||
}
|
||||
|
||||
/* Precondition: len must be < the group size */
|
||||
static void
|
||||
libagx_memcpy_small(global uchar *dst, constant uchar *src, uint len, uint tid)
|
||||
|
|
|
|||
|
|
@ -220,6 +220,50 @@ vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, uint prim,
|
|||
}
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_map_to_line_adj(uint id)
|
||||
{
|
||||
/* Sequence (1, 2), (5, 6), (9, 10), ... */
|
||||
return ((id & ~1) * 2) + (id & 1) + 1;
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_map_to_line_strip_adj(uint id)
|
||||
{
|
||||
/* Sequence (1, 2), (2, 3), (4, 5), .. */
|
||||
uint prim = id / 2;
|
||||
uint vert = id & 1;
|
||||
return prim + vert + 1;
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_map_to_tri_strip_adj(uint id)
|
||||
{
|
||||
/* Sequence (0, 2, 4), (2, 6, 4), (4, 6, 8), (6, 10, 8)
|
||||
*
|
||||
* Although tri strips with adjacency have 6 cases in general, after
|
||||
* disregarding the vertices only available in a geometry shader, there are
|
||||
* only even/odd cases. In other words, it's just a triangle strip subject to
|
||||
* extra padding.
|
||||
*
|
||||
* Dividing through by two, the sequence is:
|
||||
*
|
||||
* (0, 1, 2), (1, 3, 2), (2, 3, 4), (3, 5, 4)
|
||||
*/
|
||||
uint prim = id / 3;
|
||||
uint vtx = id % 3;
|
||||
|
||||
/* Flip the winding order of odd triangles */
|
||||
if ((prim % 2) == 1) {
|
||||
if (vtx == 1)
|
||||
vtx = 2;
|
||||
else if (vtx == 2)
|
||||
vtx = 1;
|
||||
}
|
||||
|
||||
return 2 * (prim + vtx);
|
||||
}
|
||||
|
||||
static void
|
||||
store_index(uintptr_t index_buffer, uint index_size_B, uint id, uint value)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "compiler/libcl/libcl.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "agx_pack.h"
|
||||
|
||||
#define agx_push(ptr, T, cfg) \
|
||||
|
|
@ -496,10 +497,6 @@ struct agx_usc_builder {
|
|||
#endif
|
||||
} PACKED;
|
||||
|
||||
#ifdef __OPENCL_VERSION__
|
||||
static_assert(sizeof(struct agx_usc_builder) == 8);
|
||||
#endif
|
||||
|
||||
static struct agx_usc_builder
|
||||
agx_usc_builder(GLOBAL void *out, ASSERTED size_t size)
|
||||
{
|
||||
|
|
@ -577,3 +574,18 @@ libagx_draw_robust_index_vdm_size()
|
|||
struct agx_draw draw = agx_draw_indexed(0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
return agx_vdm_draw_size(0, draw);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
libagx_remap_adj_count(unsigned count, enum mesa_prim prim)
|
||||
{
|
||||
if (prim == MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) {
|
||||
/* Spec gives formula for # of primitives in a tri strip adj */
|
||||
unsigned c4 = count >= 4 ? count - 4 : 0;
|
||||
return 3 * (c4 / 2);
|
||||
} else if (prim == MESA_PRIM_LINE_STRIP_ADJACENCY) {
|
||||
return 2 * (count >= 3 ? count - 3 : 0);
|
||||
} else {
|
||||
/* Adjacency lists just drop half the vertices. */
|
||||
return count / 2;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -637,7 +637,6 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
|
|||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
enum pipe_shader_type sw_stage = s->info.stage;
|
||||
enum pipe_shader_type hw_stage = s->b.info.stage;
|
||||
|
||||
unsigned constant_push_ranges = DIV_ROUND_UP(s->b.info.rodata.size_16, 64);
|
||||
unsigned push_ranges = 2;
|
||||
|
|
@ -683,7 +682,7 @@ hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
|
|||
if (cmd->state.gfx.draw_id_ptr)
|
||||
agx_usc_uniform(&b, (6 * count) + 4, 1, cmd->state.gfx.draw_id_ptr);
|
||||
|
||||
if (hw_stage == MESA_SHADER_COMPUTE) {
|
||||
if (linked->sw_indexing) {
|
||||
agx_usc_uniform(
|
||||
&b, (6 * count) + 8, 4,
|
||||
root_ptr + hk_root_descriptor_offset(draw.input_assembly));
|
||||
|
|
|
|||
|
|
@ -1098,11 +1098,22 @@ hk_rast_prim(struct hk_cmd_buffer *cmd)
|
|||
{
|
||||
struct hk_graphics_state *gfx = &cmd->state.gfx;
|
||||
struct hk_api_shader *gs = gfx->shaders[MESA_SHADER_GEOMETRY];
|
||||
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
|
||||
|
||||
if (gs != NULL)
|
||||
if (gs != NULL) {
|
||||
return gs->variants[HK_GS_VARIANT_RAST].info.gs.out_prim;
|
||||
else
|
||||
} else {
|
||||
switch (dyn->ia.primitive_topology) {
|
||||
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
|
||||
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
|
||||
return MESA_PRIM_LINES;
|
||||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
|
||||
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
|
||||
return MESA_PRIM_TRIANGLES;
|
||||
default:
|
||||
return hk_gs_in_prim(cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
|
|
@ -1721,7 +1732,8 @@ hk_get_fast_linked_locked_vs(struct hk_device *dev, struct hk_shader *shader,
|
|||
ralloc_memdup(shader->linked.ht, key, sizeof(*key));
|
||||
|
||||
/* XXX: Fix this higher up the stack */
|
||||
linked->b.uses_base_param |= !key->prolog.hw;
|
||||
linked->sw_indexing = !key->prolog.hw || key->prolog.adjacency;
|
||||
linked->b.uses_base_param |= linked->sw_indexing;
|
||||
|
||||
_mesa_hash_table_insert(shader->linked.ht, key_clone, linked);
|
||||
return linked;
|
||||
|
|
@ -2545,7 +2557,17 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
.prolog.robustness.soft_fault = agx_has_soft_fault(&dev->dev),
|
||||
};
|
||||
|
||||
if (!key.prolog.hw) {
|
||||
enum mesa_prim prim = vk_conv_topology(dyn->ia.primitive_topology);
|
||||
|
||||
if (mesa_prim_has_adjacency(prim)) {
|
||||
if (draw.restart) {
|
||||
prim = u_decomposed_prim(prim);
|
||||
}
|
||||
|
||||
key.prolog.adjacency = prim;
|
||||
}
|
||||
|
||||
if (key.prolog.adjacency || !key.prolog.hw) {
|
||||
key.prolog.sw_index_size_B =
|
||||
draw.indexed ? agx_index_size_to_B(draw.index_size) : 0;
|
||||
}
|
||||
|
|
@ -2886,6 +2908,17 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
gfx->dirty |= HK_DIRTY_VARYINGS;
|
||||
}
|
||||
|
||||
if (gfx->shaders[MESA_SHADER_TESS_EVAL] ||
|
||||
gfx->shaders[MESA_SHADER_GEOMETRY] || linked_vs->sw_indexing) {
|
||||
/* XXX: We should deduplicate this logic */
|
||||
bool indirect = agx_is_indirect(draw.b) || draw.restart;
|
||||
|
||||
desc->root.draw.input_assembly =
|
||||
indirect ? hk_pool_alloc(cmd, sizeof(struct agx_ia_state), 4).gpu
|
||||
: hk_upload_ia_params(cmd, draw);
|
||||
desc->root_dirty = true;
|
||||
}
|
||||
|
||||
if (gfx->shaders[MESA_SHADER_TESS_EVAL] ||
|
||||
gfx->shaders[MESA_SHADER_GEOMETRY]) {
|
||||
|
||||
|
|
@ -2895,10 +2928,6 @@ hk_flush_dynamic_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|||
/* XXX: We should deduplicate this logic */
|
||||
bool indirect = agx_is_indirect(draw.b) || draw.restart;
|
||||
|
||||
desc->root.draw.input_assembly =
|
||||
indirect ? hk_pool_alloc(cmd, sizeof(struct agx_ia_state), 4).gpu
|
||||
: hk_upload_ia_params(cmd, draw);
|
||||
|
||||
if (!indirect) {
|
||||
uint32_t verts = draw.b.count[0], instances = draw.b.count[1];
|
||||
unsigned vb_size =
|
||||
|
|
@ -3075,18 +3104,9 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
return;
|
||||
|
||||
/* Determine if we need a geometry shader to emulate XFB or adjacency */
|
||||
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
|
||||
struct hk_shader *last_sw = hk_bound_sw_vs_before_gs(gfx);
|
||||
uint32_t xfb_outputs = last_sw->info.xfb_info.output_count;
|
||||
|
||||
VkPrimitiveTopology topology = dyn->ia.primitive_topology;
|
||||
bool adjacency =
|
||||
(topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY) ||
|
||||
(topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY) ||
|
||||
(topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY) ||
|
||||
(topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY);
|
||||
|
||||
bool needs_gs = xfb_outputs || adjacency;
|
||||
bool needs_gs = xfb_outputs;
|
||||
|
||||
/* Various pipeline statistics are implemented in the pre-GS shader. TODO:
|
||||
* This could easily be optimized.
|
||||
|
|
@ -3137,9 +3157,8 @@ hk_handle_passthrough_gs(struct hk_cmd_buffer *cmd, struct agx_draw draw)
|
|||
}
|
||||
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
perf_debug(dev, "Binding passthrough GS for%s%s%s\n",
|
||||
xfb_outputs ? " XFB" : "", adjacency ? " adjacency" : "",
|
||||
ia_stats ? " statistics" : "");
|
||||
perf_debug(dev, "Binding passthrough GS for%s%s\n",
|
||||
xfb_outputs ? " XFB" : "", ia_stats ? " statistics" : "");
|
||||
|
||||
gs = hk_meta_shader(dev, hk_nir_passthrough_gs, key, key_size);
|
||||
gs->is_passthrough = true;
|
||||
|
|
@ -3376,9 +3395,15 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
|||
if (!cs)
|
||||
return;
|
||||
|
||||
struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
|
||||
bool geom = cmd->state.gfx.shaders[MESA_SHADER_GEOMETRY];
|
||||
bool tess = cmd->state.gfx.shaders[MESA_SHADER_TESS_EVAL];
|
||||
bool needs_idx_robust = hk_needs_index_robustness(cmd, &draw);
|
||||
bool adj =
|
||||
mesa_prim_has_adjacency(vk_conv_topology(dyn->ia.primitive_topology));
|
||||
adj &= !geom;
|
||||
needs_idx_robust &= !adj;
|
||||
|
||||
struct hk_cs *ccs = NULL;
|
||||
uint8_t *out = cs->current;
|
||||
assert(cs->current + 0x1000 < cs->end);
|
||||
|
|
@ -3388,7 +3413,9 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
|||
|
||||
cs->stats.calls++;
|
||||
|
||||
if (geom || tess || ia_stats || needs_idx_robust) {
|
||||
if (geom || tess || ia_stats || needs_idx_robust ||
|
||||
(adj && (agx_is_indirect(draw.b) || draw.restart))) {
|
||||
|
||||
ccs =
|
||||
hk_cmd_buffer_get_cs_general(cmd, &cmd->current_cs.pre_gfx, true);
|
||||
if (!ccs)
|
||||
|
|
@ -3415,9 +3442,42 @@ hk_draw(struct hk_cmd_buffer *cmd, uint16_t draw_id, struct agx_draw draw_)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (adj) {
|
||||
assert(!geom && "geometry shaders handle adj directly");
|
||||
enum mesa_prim prim = vk_conv_topology(dyn->ia.primitive_topology);
|
||||
|
||||
if (draw.restart) {
|
||||
draw = hk_draw_without_restart(cmd, ccs, draw, 1);
|
||||
prim = u_decomposed_prim(prim);
|
||||
}
|
||||
|
||||
if (agx_is_indirect(draw.b)) {
|
||||
const size_t size = sizeof(VkDrawIndexedIndirectCommand);
|
||||
static_assert(size > sizeof(VkDrawIndirectCommand),
|
||||
"allocation size is conservative");
|
||||
|
||||
uint64_t out_draw = hk_pool_alloc(cmd, size, 4).gpu;
|
||||
struct hk_descriptor_state *desc = &cmd->state.gfx.descriptors;
|
||||
|
||||
libagx_draw_without_adj(
|
||||
ccs, agx_1d(1), out_draw, draw.b.ptr,
|
||||
desc->root.draw.input_assembly, draw.index_buffer,
|
||||
draw.indexed ? agx_draw_index_range_el(draw) : 0,
|
||||
draw.indexed ? agx_index_size_to_B(draw.index_size) : 0, prim);
|
||||
|
||||
draw = agx_draw_indirect(out_draw);
|
||||
} else {
|
||||
unsigned count = libagx_remap_adj_count(draw.b.count[0], prim);
|
||||
|
||||
draw = (struct agx_draw){
|
||||
.b = agx_3d(count, draw.b.count[1], 1),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
enum agx_primitive topology = cmd->state.gfx.topology;
|
||||
if (needs_idx_robust) {
|
||||
assert(!geom && !tess);
|
||||
assert(!geom && !tess && !adj);
|
||||
perf_debug(dev, "lowering robust index buffer");
|
||||
|
||||
cs->current = out;
|
||||
|
|
|
|||
|
|
@ -313,6 +313,11 @@ hk_pre_gs_variant(struct hk_api_shader *obj, bool rast_disc)
|
|||
struct hk_linked_shader {
|
||||
struct agx_linked_shader b;
|
||||
|
||||
/* True if the VS prolog uses software indexing, either for geom/tess or
|
||||
* adjacency primitives.
|
||||
*/
|
||||
bool sw_indexing;
|
||||
|
||||
/* Distinct from hk_shader::counts due to addition of cf_binding_count, which
|
||||
* is delayed since it depends on cull distance.
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue