diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 52434dff42e..9f406cd3578 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2959,7 +2959,8 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx, out->inputs_flat_shaded = masks.flat; out->inputs_linear_shaded = masks.linear; } - } else if (nir->info.stage == MESA_SHADER_VERTEX) { + } else if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL) { out->has_edgeflags = nir->info.outputs_written & VARYING_BIT_EDGE; out->cull_distance_size = nir->info.cull_distance_array_size; diff --git a/src/asahi/compiler/agx_nir_lower_cull_distance.c b/src/asahi/compiler/agx_nir_lower_cull_distance.c index 8c7734e3342..58c15730bb2 100644 --- a/src/asahi/compiler/agx_nir_lower_cull_distance.c +++ b/src/asahi/compiler/agx_nir_lower_cull_distance.c @@ -9,6 +9,7 @@ #include "agx_compile.h" #include "agx_nir.h" #include "glsl_types.h" +#include "shader_enums.h" /* * Lower cull distance to discard. From the spec: @@ -61,7 +62,9 @@ lower_write(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data) bool agx_nir_lower_cull_distance_vs(nir_shader *s) { - assert(s->info.stage == MESA_SHADER_VERTEX); + assert(s->info.stage == MESA_SHADER_VERTEX || + s->info.stage == MESA_SHADER_TESS_EVAL); + assert(s->info.outputs_written & VARYING_BIT_CULL_DIST0); nir_shader_intrinsics_pass( diff --git a/src/asahi/lib/agx_nir_lower_gs.c b/src/asahi/lib/agx_nir_lower_gs.c index 238126e2492..fe8b3f64083 100644 --- a/src/asahi/lib/agx_nir_lower_gs.c +++ b/src/asahi/lib/agx_nir_lower_gs.c @@ -131,15 +131,10 @@ add_counter(nir_builder *b, nir_def *counter, nir_def *increment) } /* Helpers for lowering I/O to variables */ -struct lower_output_to_var_state { - nir_variable *outputs[NUM_TOTAL_VARYING_SLOTS]; - bool arrayed; -}; - -static bool -lower_output_to_var(nir_builder *b, nir_instr *instr, void *data) +bool +agx_lower_output_to_var(nir_builder *b, nir_instr *instr, void *data) { - struct lower_output_to_var_state *state = data; + struct agx_lower_output_to_var_state *state = data; if (instr->type != nir_instr_type_intrinsic) return false; @@ -201,7 +196,7 @@ load_instance_id(nir_builder *b) static bool lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data) { - struct lower_output_to_var_state *vs_state = data; + struct agx_lower_output_to_var_state *vs_state = data; if (intr->intrinsic != nir_intrinsic_load_per_vertex_input) return false; @@ -249,7 +244,7 @@ lower_id_in_prim(nir_builder *b, nir_instr *instr, void *data) static void agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs) { - struct lower_output_to_var_state state = {.arrayed = true}; + struct agx_lower_output_to_var_state state = {.arrayed = true}; /* Vertex shader outputs will be placed in arrays. Create those arrays. */ u_foreach_bit64(slot, vs->info.outputs_written) { @@ -278,7 +273,7 @@ agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs) /* The vertex shader needs to be expressed in terms of that index */ nir_function_instructions_pass( - vs_function->impl, lower_output_to_var, + vs_function->impl, agx_lower_output_to_var, nir_metadata_block_index | nir_metadata_dominance, &state); nir_function_instructions_pass( @@ -1144,7 +1139,7 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx, *gs_count = NULL; /* Geometry shader outputs are staged to temporaries */ - struct lower_output_to_var_state state = {.arrayed = false}; + struct agx_lower_output_to_var_state state = {.arrayed = false}; u_foreach_bit64(slot, gs->info.outputs_written) { const char *slot_name = @@ -1165,7 +1160,7 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx, gs_state.stride_B += size_B; } - NIR_PASS(_, gs, nir_shader_instructions_pass, lower_output_to_var, + NIR_PASS(_, gs, nir_shader_instructions_pass, agx_lower_output_to_var, nir_metadata_block_index | nir_metadata_dominance, &state); /* Set flatshade_first. For now this is always a constant, but in the future diff --git a/src/asahi/lib/agx_nir_lower_gs.h b/src/asahi/lib/agx_nir_lower_gs.h index 3925f07e5da..e2799c748e2 100644 --- a/src/asahi/lib/agx_nir_lower_gs.h +++ b/src/asahi/lib/agx_nir_lower_gs.h @@ -7,11 +7,25 @@ #define __AGX_NIR_LOWER_GS_H #include +#include +#include "shader_enums.h" struct nir_shader; struct agx_ia_key; enum mesa_prim; +struct nir_instr; +struct nir_builder; +struct nir_variable; + +struct agx_lower_output_to_var_state { + struct nir_variable *outputs[NUM_TOTAL_VARYING_SLOTS]; + bool arrayed; +}; + +bool agx_lower_output_to_var(struct nir_builder *b, struct nir_instr *instr, + void *data); + bool agx_nir_lower_ia(struct nir_shader *s, struct agx_ia_key *ia); bool agx_nir_lower_multidraw(struct nir_shader *s, struct agx_ia_key *key); @@ -33,4 +47,13 @@ struct nir_shader *agx_nir_unroll_restart(const struct nir_shader *libagx, enum mesa_prim prim, unsigned index_size_B); +bool agx_nir_lower_tcs(struct nir_shader *tcs, const struct nir_shader *vs, + const struct nir_shader *libagx, uint8_t index_size_B); + +bool agx_nir_lower_tes(struct nir_shader *tes, const struct nir_shader *libagx); + +uint64_t agx_tcs_per_vertex_outputs(const struct nir_shader *nir); + +unsigned agx_tcs_output_stride(const struct nir_shader *nir); + #endif diff --git a/src/asahi/lib/agx_nir_lower_ia.c b/src/asahi/lib/agx_nir_lower_ia.c index c14e59191e3..2c97bab1fb0 100644 --- a/src/asahi/lib/agx_nir_lower_ia.c +++ b/src/asahi/lib/agx_nir_lower_ia.c @@ -12,6 +12,7 @@ #include "nir.h" #include "nir_builder_opcodes.h" #include "nir_intrinsics.h" +#include "shader_enums.h" /* * This file implements input assembly in software for geometry/tessellation @@ -27,14 +28,60 @@ * This multidraw implementation kicks off the prefix sum and lowered draw. */ +/* + * Sync with geometry.cl, this is preferred to avoid NIR needing to chew through + * the massive switch statement (bad for compile time). + */ +static nir_def * +vertex_id_for_topology(nir_builder *b, struct agx_ia_key *key) +{ + nir_def *prim = nir_load_primitive_id(b); + nir_def *vert = nir_load_vertex_id_in_primitive_agx(b); + nir_def *flatshade_first = nir_imm_bool(b, key->flatshade_first); + + switch (key->mode) { + case MESA_PRIM_POINTS: + case MESA_PRIM_LINES: + case MESA_PRIM_TRIANGLES: + case MESA_PRIM_LINES_ADJACENCY: + case MESA_PRIM_TRIANGLES_ADJACENCY: + return nir_iadd( + b, nir_imul_imm(b, prim, mesa_vertices_per_prim(key->mode)), vert); + + case MESA_PRIM_LINE_LOOP: + return libagx_vertex_id_for_line_loop(b, prim, vert, + nir_load_num_vertices(b)); + + case MESA_PRIM_LINE_STRIP: + case MESA_PRIM_LINE_STRIP_ADJACENCY: + return nir_iadd(b, prim, vert); + + case MESA_PRIM_TRIANGLE_STRIP: { + return nir_iadd( + b, prim, + libagx_map_vertex_in_tri_strip(b, prim, vert, flatshade_first)); + } + + case MESA_PRIM_TRIANGLE_FAN: + return libagx_vertex_id_for_tri_fan(b, prim, vert, flatshade_first); + + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: + return libagx_vertex_id_for_tri_strip_adj( + b, prim, vert, nir_load_num_vertices(b), flatshade_first); + + case MESA_PRIM_PATCHES: + return nir_iadd(b, nir_imul(b, prim, nir_load_patch_vertices_in(b)), + nir_load_invocation_id(b)); + + default: + unreachable("invalid mode"); + } +} + static nir_def * load_vertex_id(nir_builder *b, struct agx_ia_key *key) { - /* Tessellate by primitive mode */ - nir_def *id = libagx_vertex_id_for_topology( - b, nir_imm_int(b, key->mode), nir_imm_bool(b, key->flatshade_first), - nir_load_primitive_id(b), nir_load_vertex_id_in_primitive_agx(b), - nir_load_num_vertices(b)); + nir_def *id = vertex_id_for_topology(b, key); /* If drawing with an index buffer, pull the vertex ID. Otherwise, the * vertex ID is just the index as-is. diff --git a/src/asahi/lib/agx_nir_lower_tess.c b/src/asahi/lib/agx_nir_lower_tess.c new file mode 100644 index 00000000000..6381139d218 --- /dev/null +++ b/src/asahi/lib/agx_nir_lower_tess.c @@ -0,0 +1,395 @@ +/* + * Copyright 2023 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ + +#include "shaders/geometry.h" +#include "util/bitscan.h" +#include "util/macros.h" +#include "agx_nir_lower_gs.h" +#include "glsl_types.h" +#include "libagx_shaders.h" +#include "nir.h" +#include "nir_builder.h" +#include "nir_builder_opcodes.h" +#include "nir_intrinsics.h" +#include "nir_intrinsics_indices.h" +#include "shader_enums.h" + +struct tcs_state { + struct agx_lower_output_to_var_state vs_vars; + uint64_t vs_outputs_written; +}; + +static nir_def * +tcs_patch_id(nir_builder *b) +{ + return nir_channel(b, nir_load_workgroup_id(b), 0); +} + +static nir_def * +tcs_instance_id(nir_builder *b) +{ + return nir_channel(b, nir_load_workgroup_id(b), 1); +} + +static nir_def * +tcs_unrolled_id(nir_builder *b) +{ + nir_def *stride = nir_channel(b, nir_load_num_workgroups(b), 0); + + return nir_iadd(b, nir_imul(b, tcs_instance_id(b), stride), tcs_patch_id(b)); +} + +uint64_t +agx_tcs_per_vertex_outputs(const nir_shader *nir) +{ + return nir->info.outputs_written & + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER | + VARYING_BIT_BOUNDING_BOX0 | VARYING_BIT_BOUNDING_BOX1); +} + +unsigned +agx_tcs_output_stride(const nir_shader *nir) +{ + return libagx_tcs_out_stride(util_last_bit(nir->info.patch_outputs_written), + nir->info.tess.tcs_vertices_out, + agx_tcs_per_vertex_outputs(nir)); +} + +static nir_def * +tcs_out_addr(nir_builder *b, nir_intrinsic_instr *intr, nir_def *vertex_id) +{ + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + + nir_def *offset = nir_get_io_offset_src(intr)->ssa; + nir_def *addr = libagx_tcs_out_address( + b, nir_load_tess_param_buffer_agx(b), tcs_unrolled_id(b), vertex_id, + nir_iadd_imm(b, offset, sem.location), + nir_imm_int(b, util_last_bit(b->shader->info.patch_outputs_written)), + nir_imm_int(b, b->shader->info.tess.tcs_vertices_out), + nir_imm_int64(b, agx_tcs_per_vertex_outputs(b->shader))); + + addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4); + + return addr; +} + +static nir_def * +lower_tes_load(nir_builder *b, nir_intrinsic_instr *intr) +{ + gl_varying_slot location = nir_intrinsic_io_semantics(intr).location; + nir_src *offset_src = nir_get_io_offset_src(intr); + + nir_def *vertex = nir_imm_int(b, 0); + nir_def *offset = offset_src ? offset_src->ssa : nir_imm_int(b, 0); + + if (intr->intrinsic == nir_intrinsic_load_per_vertex_input) + vertex = intr->src[0].ssa; + + nir_def *addr = libagx_tes_in_address(b, nir_load_tess_param_buffer_agx(b), + nir_load_vertex_id(b), vertex, + nir_iadd_imm(b, offset, location)); + + if (nir_intrinsic_has_component(intr)) + addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4); + + return nir_load_global_constant(b, addr, 4, intr->def.num_components, + intr->def.bit_size); +} + +static nir_def * +tcs_load_input(nir_builder *b, nir_intrinsic_instr *intr, + struct tcs_state *state) +{ + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + + nir_def *off = libagx_tcs_in_offset( + b, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, sem.location), + nir_imm_int64(b, state->vs_outputs_written)); + + off = nir_iadd_imm(b, off, 4 * nir_intrinsic_component(intr)); + + return nir_load_shared(b, intr->def.num_components, 32, off); +} + +static nir_def * +lower_tcs_impl(nir_builder *b, nir_intrinsic_instr *intr, + struct tcs_state *state) +{ + switch (intr->intrinsic) { + case nir_intrinsic_barrier: + /* A patch fits in a subgroup, so the barrier is unnecessary. */ + return NIR_LOWER_INSTR_PROGRESS_REPLACE; + + case nir_intrinsic_load_primitive_id: + return tcs_patch_id(b); + + case nir_intrinsic_load_instance_id: + return tcs_instance_id(b); + + case nir_intrinsic_load_invocation_id: + return nir_channel(b, nir_load_local_invocation_id(b), 0); + + case nir_intrinsic_load_per_vertex_input: + return tcs_load_input(b, intr, state); + + case nir_intrinsic_load_patch_vertices_in: + return libagx_tcs_patch_vertices_in(b, nir_load_tess_param_buffer_agx(b)); + + case nir_intrinsic_load_tess_level_outer_default: + return libagx_tess_level_outer_default(b, + nir_load_tess_param_buffer_agx(b)); + + case nir_intrinsic_load_tess_level_inner_default: + return libagx_tess_level_inner_default(b, + nir_load_tess_param_buffer_agx(b)); + + case nir_intrinsic_load_output: { + nir_def *addr = tcs_out_addr(b, intr, nir_undef(b, 1, 32)); + return nir_load_global(b, addr, 4, intr->def.num_components, + intr->def.bit_size); + } + + case nir_intrinsic_load_per_vertex_output: { + nir_def *addr = tcs_out_addr(b, intr, intr->src[0].ssa); + return nir_load_global(b, addr, 4, intr->def.num_components, + intr->def.bit_size); + } + + case nir_intrinsic_store_output: { + nir_store_global(b, tcs_out_addr(b, intr, nir_undef(b, 1, 32)), 4, + intr->src[0].ssa, nir_intrinsic_write_mask(intr)); + return NIR_LOWER_INSTR_PROGRESS_REPLACE; + } + + case nir_intrinsic_store_per_vertex_output: { + nir_store_global(b, tcs_out_addr(b, intr, intr->src[1].ssa), 4, + intr->src[0].ssa, nir_intrinsic_write_mask(intr)); + return NIR_LOWER_INSTR_PROGRESS_REPLACE; + } + + default: + return NULL; + } +} + +static bool +lower_tcs(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + b->cursor = nir_before_instr(&intr->instr); + + nir_def *repl = lower_tcs_impl(b, intr, data); + if (!repl) + return false; + + if (repl != NIR_LOWER_INSTR_PROGRESS_REPLACE) + nir_def_rewrite_uses(&intr->def, repl); + + nir_instr_remove(&intr->instr); + return true; +} + +static void +link_libagx(nir_shader *nir, const nir_shader *libagx) +{ + nir_link_shader_functions(nir, libagx); + NIR_PASS(_, nir, nir_inline_functions); + nir_remove_non_entrypoints(nir); + NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 64); + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, + glsl_get_cl_type_size_align); + NIR_PASS(_, nir, nir_opt_deref); + NIR_PASS(_, nir, nir_lower_vars_to_ssa); + NIR_PASS(_, nir, nir_lower_explicit_io, + nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared | + nir_var_mem_global, + nir_address_format_62bit_generic); +} + +/* + * Predicate the TCS so the merged shader works when input patch size > output + * patch size. + */ +static bool +agx_nir_predicate_tcs(nir_shader *tcs) +{ + nir_function_impl *entry = nir_shader_get_entrypoint(tcs); + nir_cf_list list; + nir_cf_extract(&list, nir_before_impl(entry), nir_after_impl(entry)); + + nir_builder b = nir_builder_at(nir_after_block(nir_start_block(entry))); + nir_def *input_vtx_id = nir_load_invocation_id(&b); + unsigned verts = tcs->info.tess.tcs_vertices_out; + + nir_push_if(&b, nir_ult_imm(&b, input_vtx_id, verts)); + { + nir_cf_reinsert(&list, b.cursor); + } + nir_pop_if(&b, NULL); + + nir_metadata_preserve(entry, nir_metadata_none); + return false; +} + +bool +agx_nir_lower_tcs(nir_shader *tcs, const nir_shader *vs, + const struct nir_shader *libagx, uint8_t index_size_B) +{ + agx_nir_predicate_tcs(tcs); + + nir_function_impl *tcs_entry = nir_shader_get_entrypoint(tcs); + + /* Link the vertex shader with the TCS. This assumes that all functions have + * been inlined in the vertex shader. + */ + nir_function_impl *vs_entry = nir_shader_get_entrypoint(vs); + nir_function *vs_function = nir_function_create(tcs, "vertex"); + vs_function->impl = nir_function_impl_clone(tcs, vs_entry); + vs_function->impl->function = vs_function; + + /* Vertex shader outputs are staged to temporaries */ + struct tcs_state state = { + .vs_vars.arrayed = false, + .vs_outputs_written = vs->info.outputs_written & tcs->info.inputs_read, + }; + + u_foreach_bit64(slot, vs->info.outputs_written) { + const char *slot_name = + gl_varying_slot_name_for_stage(slot, MESA_SHADER_VERTEX); + + state.vs_vars.outputs[slot] = nir_variable_create( + tcs, nir_var_shader_temp, glsl_uvec4_type(), slot_name); + } + + nir_function_instructions_pass( + vs_function->impl, agx_lower_output_to_var, + nir_metadata_block_index | nir_metadata_dominance, &state.vs_vars); + + /* Invoke the VS first for each vertex in the input patch */ + nir_builder b_ = nir_builder_at(nir_before_impl(tcs_entry)); + nir_builder *b = &b_; + + nir_def *input_vtx_id = nir_load_invocation_id(b); + nir_push_if(b, nir_ult(b, input_vtx_id, nir_load_patch_vertices_in(b))); + { + nir_inline_function_impl(b, vs_function->impl, NULL, NULL); + + /* To handle cross-invocation VS output reads, dump everything in + * shared local memory. + * + * TODO: Optimize to registers. + */ + u_foreach_bit64(slot, state.vs_outputs_written) { + nir_def *off = + libagx_tcs_in_offset(b, input_vtx_id, nir_imm_int(b, slot), + nir_imm_int64(b, state.vs_outputs_written)); + + nir_store_shared(b, nir_load_var(b, state.vs_vars.outputs[slot]), off, + .write_mask = nir_component_mask(4)); + } + } + nir_pop_if(b, NULL); + + /* Clean up after inlining VS into TCS */ + exec_node_remove(&vs_function->node); + nir_lower_global_vars_to_local(tcs); + + /* Lower I/A. TODO: Indirect multidraws */ + agx_nir_lower_ia(tcs, &(struct agx_ia_key){ + .index_size = index_size_B, + .mode = MESA_PRIM_PATCHES, + }); + + /* Lower TCS outputs */ + nir_shader_intrinsics_pass(tcs, lower_tcs, + nir_metadata_block_index | nir_metadata_dominance, + &state); + link_libagx(tcs, libagx); + nir_metadata_preserve(b->impl, nir_metadata_none); + return true; +} + +static nir_def * +lower_tes_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + switch (intr->intrinsic) { + case nir_intrinsic_load_tess_coord_xy: + return libagx_load_tess_coord(b, nir_load_tess_param_buffer_agx(b), + nir_load_vertex_id(b)); + + case nir_intrinsic_load_primitive_id: + return libagx_tes_patch_id(b, nir_load_tess_param_buffer_agx(b), + nir_load_vertex_id(b)); + + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_tess_level_inner: + case nir_intrinsic_load_tess_level_outer: + return lower_tes_load(b, intr); + + case nir_intrinsic_load_patch_vertices_in: + return libagx_tes_patch_vertices_in(b, nir_load_tess_param_buffer_agx(b)); + + default: + return NULL; + } +} + +static bool +lower_tes(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + b->cursor = nir_before_instr(&intr->instr); + nir_def *repl = lower_tes_impl(b, intr, data); + + if (repl) { + nir_def_rewrite_uses(&intr->def, repl); + nir_instr_remove(&intr->instr); + return true; + } else { + return false; + } +} + +static int +glsl_type_size(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +bool +agx_nir_lower_tes(nir_shader *tes, const nir_shader *libagx) +{ + nir_lower_tess_coord_z( + tes, tes->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES); + + nir_shader_intrinsics_pass( + tes, lower_tes, nir_metadata_block_index | nir_metadata_dominance, NULL); + + /* Points mode renders as points, make sure we write point size for the HW */ + if (tes->info.tess.point_mode && + !(tes->info.outputs_written & VARYING_BIT_PSIZ)) { + + nir_function_impl *impl = nir_shader_get_entrypoint(tes); + nir_builder b = nir_builder_at(nir_after_impl(impl)); + + nir_store_output(&b, nir_imm_float(&b, 1.0), nir_imm_int(&b, 0), + .io_semantics.location = VARYING_SLOT_PSIZ, + .write_mask = nir_component_mask(1), .range = 1); + + tes->info.outputs_written |= VARYING_BIT_PSIZ; + } + + /* We lower to a HW VS, so update the shader info so the compiler does the + * right thing. + */ + tes->info.stage = MESA_SHADER_VERTEX; + memset(&tes->info.vs, 0, sizeof(tes->info.vs)); + tes->info.vs.tes_agx = true; + + link_libagx(tes, libagx); + nir_lower_idiv(tes, &(nir_lower_idiv_options){.allow_fp16 = true}); + nir_metadata_preserve(nir_shader_get_entrypoint(tes), nir_metadata_none); + return true; +} diff --git a/src/asahi/lib/meson.build b/src/asahi/lib/meson.build index ebfb5a83125..856bfe7a344 100644 --- a/src/asahi/lib/meson.build +++ b/src/asahi/lib/meson.build @@ -17,6 +17,7 @@ libasahi_lib_files = files( 'agx_nir_lower_ia.c', 'agx_nir_lower_msaa.c', 'agx_nir_lower_sample_intrinsics.c', + 'agx_nir_lower_tess.c', 'agx_nir_lower_tilebuffer.c', 'agx_nir_lower_vbo.c', 'agx_nir_predicate_layer_id.c', @@ -32,6 +33,8 @@ libagx_shader_files = files( 'shaders/libagx.h', 'shaders/geometry.cl', 'shaders/geometry.h', + 'shaders/tessellation.cl', + 'shaders/tessellator.cl', 'shaders/texture.cl', ) diff --git a/src/asahi/lib/shaders/geometry.cl b/src/asahi/lib/shaders/geometry.cl index 366d66846b6..d1b68b8e632 100644 --- a/src/asahi/lib/shaders/geometry.cl +++ b/src/asahi/lib/shaders/geometry.cl @@ -40,7 +40,80 @@ libagx_xfb_vertex_address(global struct agx_geometry_params *p, uint base_index, return (uintptr_t)(p->xfb_base[buffer]) + xfb_offset; } -/* TODO: Primitive restart */ +uint +libagx_vertex_id_for_line_loop(uint prim, uint vert, uint num_prims) +{ + /* (0, 1), (1, 2), (2, 0) */ + if (prim == (num_prims - 1) && vert == 1) + return 0; + else + return prim + vert; +} + +uint +libagx_vertex_id_for_tri_fan(uint prim, uint vert, bool flatshade_first) +{ + /* Vulkan spec section 20.1.7 gives (i + 1, i + 2, 0) for a provoking + * first. OpenGL instead wants (0, i + 1, i + 2) with a provoking last. + * Piglit clipflat expects us to switch between these orders depending on + * provoking vertex, to avoid trivializing the fan. + * + * Rotate accordingly. + */ + if (flatshade_first) + vert = (vert + 1) % 3; + + /* The simpler form assuming last is provoking. */ + return (vert == 0) ? 0 : prim + vert; +} + +uint +libagx_vertex_id_for_tri_strip_adj(uint prim, uint vert, uint num_prims, + bool flatshade_first) +{ + /* See Vulkan spec section 20.1.11 "Triangle Strips With Adjancency". + * + * There are different cases for first/middle/last/only primitives and for + * odd/even primitives. Determine which case we're in. + */ + bool last = prim == (num_prims - 1); + bool first = prim == 0; + bool even = (prim & 1) == 0; + bool even_or_first = even || first; + + /* When the last vertex is provoking, we rotate the primitives + * accordingly. This seems required for OpenGL. + */ + if (!flatshade_first && !even_or_first) { + vert = (vert + 4u) % 6u; + } + + /* Offsets per the spec. The spec lists 6 cases with 6 offsets. Luckily, + * there are lots of patterns we can exploit, avoiding a full 6x6 LUT. + * + * Here we assume the first vertex is provoking, the Vulkan default. + */ + uint offsets[6] = { + 0, + first ? 1 : (even ? -2 : 3), + even_or_first ? 2 : 4, + last ? 5 : 6, + even_or_first ? 4 : 2, + even_or_first ? 3 : -2, + }; + + /* Ensure NIR can see thru the local array */ + uint offset = 0; + for (uint i = 1; i < 6; ++i) { + if (i == vert) + offset = offsets[i]; + } + + /* Finally add to the base of the primitive */ + return (prim * 2) + offset; +} + +/* Sync with agx_nir_lower_ia.c, this is for the restart unrolling */ uint libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, uint prim, uint vert, uint num_prims) @@ -50,24 +123,17 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, case MESA_PRIM_LINES: case MESA_PRIM_TRIANGLES: case MESA_PRIM_LINES_ADJACENCY: - case MESA_PRIM_TRIANGLES_ADJACENCY: { + case MESA_PRIM_TRIANGLES_ADJACENCY: /* Regular primitive: every N vertices defines a primitive */ return (prim * mesa_vertices_per_prim(mode)) + vert; - } - case MESA_PRIM_LINE_LOOP: { - /* (0, 1), (1, 2), (2, 0) */ - if (prim == (num_prims - 1) && vert == 1) - return 0; - else - return prim + vert; - } + case MESA_PRIM_LINE_LOOP: + return libagx_vertex_id_for_line_loop(prim, vert, num_prims); case MESA_PRIM_LINE_STRIP: - case MESA_PRIM_LINE_STRIP_ADJACENCY: { + case MESA_PRIM_LINE_STRIP_ADJACENCY: /* (i, i + 1) or (i, ..., i + 3) */ return prim + vert; - } case MESA_PRIM_TRIANGLE_STRIP: { /* Order depends on the provoking vert. @@ -80,66 +146,14 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, return prim + libagx_map_vertex_in_tri_strip(prim, vert, flatshade_first); } - case MESA_PRIM_TRIANGLE_FAN: { - /* Vulkan spec section 20.1.7 gives (i + 1, i + 2, 0) for a provoking - * first. OpenGL instead wants (0, i + 1, i + 2) with a provoking last. - * Piglit clipflat expects us to switch between these orders depending on - * provoking vertex, to avoid trivializing the fan. - * - * Rotate accordingly. - */ - if (flatshade_first) - vert = (vert + 1) % 3; + case MESA_PRIM_TRIANGLE_FAN: + return libagx_vertex_id_for_tri_fan(prim, vert, flatshade_first); - /* The simpler form assuming last is provoking. */ - return (vert == 0) ? 0 : prim + vert; - } - - case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: { - /* See Vulkan spec section 20.1.11 "Triangle Strips With Adjancency". - * - * There are different cases for first/middle/last/only primitives and for - * odd/even primitives. Determine which case we're in. - */ - bool last = prim == (num_prims - 1); - bool first = prim == 0; - bool even = (prim & 1) == 0; - bool even_or_first = even || first; - - /* When the last vertex is provoking, we rotate the primitives - * accordingly. This seems required for OpenGL. - */ - if (!flatshade_first && !even_or_first) { - vert = (vert + 4u) % 6u; - } - - /* Offsets per the spec. The spec lists 6 cases with 6 offsets. Luckily, - * there are lots of patterns we can exploit, avoiding a full 6x6 LUT. - * - * Here we assume the first vertex is provoking, the Vulkan default. - */ - uint offsets[6] = { - 0, - first ? 1 : (even ? -2 : 3), - even_or_first ? 2 : 4, - last ? 5 : 6, - even_or_first ? 4 : 2, - even_or_first ? 3 : -2, - }; - - /* Ensure NIR can see thru the local array */ - uint offset = 0; - for (uint i = 1; i < 6; ++i) { - if (i == vert) - offset = offsets[i]; - } - - /* Finally add to the base of the primitive */ - return (prim * 2) + offset; - } + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: + return libagx_vertex_id_for_tri_strip_adj(prim, vert, num_prims, + flatshade_first); default: - /* Invalid */ return 0; } } diff --git a/src/asahi/lib/shaders/geometry.h b/src/asahi/lib/shaders/geometry.h index fe7e220d567..b2d5dc54dbe 100644 --- a/src/asahi/lib/shaders/geometry.h +++ b/src/asahi/lib/shaders/geometry.h @@ -8,13 +8,16 @@ #include "libagx.h" #ifndef __OPENCL_VERSION__ +#include "util/bitscan.h" #include "util/macros.h" -#define GLOBAL(type_) uint64_t -#define CONST(type_) uint64_t +#define GLOBAL(type_) uint64_t +#define CONST(type_) uint64_t +#define libagx_popcount(x) util_bitcount64(x) #else #define PACKED -#define GLOBAL(type_) global type_ * -#define CONST(type_) constant type_ * +#define GLOBAL(type_) global type_ * +#define CONST(type_) constant type_ * +#define libagx_popcount(x) popcount(x) #endif #ifndef LIBAGX_GEOMETRY_H @@ -156,4 +159,116 @@ struct agx_geometry_params { uint32_t count_buffer_stride; } PACKED; +struct agx_tess_params { + /* Persistent (cross-draw) geometry state */ + GLOBAL(struct agx_geometry_state) state; + + /* Patch coordinate offsets in patch_coord_buffer, indexed by patch ID. */ + GLOBAL(uint) patch_coord_offs; + + /* Patch coordinate buffer, indexed as: + * + * patch_coord_offs[patch_ID] + vertex_in_patch + * + * Currently float2s, but we might be able to compact later? + */ + GLOBAL(float2) patch_coord_buffer; + + /* Tessellation control shader output buffer, indexed by patch ID. */ + GLOBAL(uchar) tcs_buffer; + + /* Bitfield of TCS per-vertex outputs */ + uint64_t tcs_per_vertex_outputs; + + /* Default tess levels used in OpenGL when there is no TCS in the pipeline. + * Unused in Vulkan and OpenGL ES. + */ + float tess_level_outer_default[4]; + float tess_level_inner_default[4]; + + /* Number of vertices in the input patch */ + uint input_patch_size; + + /* Number of vertices in the TCS output patch */ + uint output_patch_size; + + /* Number of patch constants written by TCS */ + uint tcs_patch_constants; + + /* Number of input patches per instance of the VS/TCS */ + uint patches_per_instance; +} PACKED; + +/* TCS shared memory layout: + * + * vec4 vs_outputs[VERTICES_IN_INPUT_PATCH][TOTAL_VERTEX_OUTPUTS]; + * + * TODO: compact. + */ +static inline ushort +libagx_tcs_in_offs(uint vtx, gl_varying_slot location, + uint64_t crosslane_vs_out_mask) +{ + uint base = vtx * libagx_popcount(crosslane_vs_out_mask); + uint offs = libagx_popcount(crosslane_vs_out_mask & + (((uint64_t)(1) << location) - 1)); + + return (base + offs) * 16; +} + +static inline uint +libagx_tcs_in_size(uint32_t vertices_in_patch, uint64_t crosslane_vs_out_mask) +{ + return libagx_tcs_in_offs(vertices_in_patch - 1, VARYING_SLOT_VAR31, + crosslane_vs_out_mask); +} + +/* + * TCS out buffer layout, per-patch: + * + * float tess_level_outer[4]; + * float tess_level_inner[2]; + * vec4 patch_out[MAX_PATCH_OUTPUTS]; + * vec4 vtx_out[OUT_PATCH_SIZE][TOTAL_VERTEX_OUTPUTS]; + * + * Vertex out are compacted based on the mask of written out. Patch + * out are used as-is. + * + * Bounding boxes are ignored. + */ +static inline uint +libagx_tcs_out_offs(uint vtx_id, gl_varying_slot location, uint nr_patch_out, + uint out_patch_size, uint64_t vtx_out_mask) +{ + uint off = 0; + if (location == VARYING_SLOT_TESS_LEVEL_OUTER) + return off; + + off += 4 * sizeof(float); + if (location == VARYING_SLOT_TESS_LEVEL_INNER) + return off; + + off += 2 * sizeof(float); + if (location >= VARYING_SLOT_PATCH0) + return off + (16 * (location - VARYING_SLOT_PATCH0)); + + /* Anything else is a per-vtx output */ + off += 16 * nr_patch_out; + off += 16 * vtx_id * libagx_popcount(vtx_out_mask); + + uint idx = libagx_popcount(vtx_out_mask & (((uint64_t)(1) << location) - 1)); + return off + (16 * idx); +} + +static inline uint +libagx_tcs_out_stride(uint nr_patch_out, uint out_patch_size, + uint64_t vtx_out_mask) +{ + return libagx_tcs_out_offs(out_patch_size, VARYING_SLOT_VAR0, nr_patch_out, + out_patch_size, vtx_out_mask); +} + +/* In a tess eval shader, stride for hw vertex ID */ +#define LIBAGX_TES_PATCH_ID_STRIDE 8192 + #endif diff --git a/src/asahi/lib/shaders/tessellation.cl b/src/asahi/lib/shaders/tessellation.cl new file mode 100644 index 00000000000..c4d549b9ff7 --- /dev/null +++ b/src/asahi/lib/shaders/tessellation.cl @@ -0,0 +1,92 @@ +/* + * Copyright 2023 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ + +#include "geometry.h" + +uint +libagx_tcs_patch_vertices_in(constant struct agx_tess_params *p) +{ + return p->input_patch_size; +} + +uint +libagx_tes_patch_vertices_in(constant struct agx_tess_params *p) +{ + return p->output_patch_size; +} + +ushort +libagx_tcs_in_offset(uint vtx, gl_varying_slot location, + uint64_t crosslane_vs_out_mask) +{ + return libagx_tcs_in_offs(vtx, location, crosslane_vs_out_mask); +} + +uintptr_t +libagx_tcs_out_address(constant struct agx_tess_params *p, uint patch_id, + uint vtx_id, gl_varying_slot location, uint nr_patch_out, + uint out_patch_size, uint64_t vtx_out_mask) +{ + uint stride = + libagx_tcs_out_stride(nr_patch_out, out_patch_size, vtx_out_mask); + + uint offs = libagx_tcs_out_offs(vtx_id, location, nr_patch_out, + out_patch_size, vtx_out_mask); + + return (uintptr_t)(p->tcs_buffer) + (patch_id * stride) + offs; +} + +static uint +libagx_tes_unrolled_patch_id(uint raw_id) +{ + return raw_id / LIBAGX_TES_PATCH_ID_STRIDE; +} + +uint +libagx_tes_patch_id(constant struct agx_tess_params *p, uint raw_id) +{ + return libagx_tes_unrolled_patch_id(raw_id) % p->patches_per_instance; +} + +static uint +tes_vertex_id_in_patch(uint raw_id) +{ + return raw_id % LIBAGX_TES_PATCH_ID_STRIDE; +} + +float2 +libagx_load_tess_coord(constant struct agx_tess_params *p, uint raw_id) +{ + uint patch = libagx_tes_unrolled_patch_id(raw_id); + uint vtx = tes_vertex_id_in_patch(raw_id); + + return p->patch_coord_buffer[p->patch_coord_offs[patch] + vtx]; +} + +uintptr_t +libagx_tes_in_address(constant struct agx_tess_params *p, uint raw_id, + uint vtx_id, gl_varying_slot location) +{ + uint patch = libagx_tes_unrolled_patch_id(raw_id); + + return libagx_tcs_out_address(p, patch, vtx_id, location, + p->tcs_patch_constants, p->output_patch_size, + p->tcs_per_vertex_outputs); +} + +float4 +libagx_tess_level_outer_default(constant struct agx_tess_params *p) +{ + return ( + float4)(p->tess_level_outer_default[0], p->tess_level_outer_default[1], + p->tess_level_outer_default[2], p->tess_level_outer_default[3]); +} + +float2 +libagx_tess_level_inner_default(constant struct agx_tess_params *p) +{ + return (float2)(p->tess_level_inner_default[0], + p->tess_level_inner_default[1]); +} diff --git a/src/asahi/lib/shaders/tessellator.cl b/src/asahi/lib/shaders/tessellator.cl new file mode 100644 index 00000000000..0a1fe63e66a --- /dev/null +++ b/src/asahi/lib/shaders/tessellator.cl @@ -0,0 +1,8 @@ +/* + * Copyright 2023 Alyssa Rosenzweig + * Copyright (c) Microsoft Corporation + * SPDX-License-Identifier: MIT + */ + +#include "geometry.h" + diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 2955e88882b..4a676f06145 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1899,6 +1899,9 @@ system_value("input_assembly_buffer_agx", 1, bit_sizes=[64]) # Address of the parameter buffer for AGX geometry shaders system_value("geometry_param_buffer_agx", 1, bit_sizes=[64]) +# Address of the parameter buffer for AGX tessellation shaders +system_value("tess_param_buffer_agx", 1, bit_sizes=[64]) + # Loads the vertex index within the current decomposed primitive. For a # triangle, this will be in [0, 2], where 2 is the last vertex. This is defined # only when the vertex shader is reinvoked for the same vertex in each diff --git a/src/gallium/drivers/asahi/agx_blit.c b/src/gallium/drivers/asahi/agx_blit.c index 7f6cccf171f..ca1e9d24af8 100644 --- a/src/gallium/drivers/asahi/agx_blit.c +++ b/src/gallium/drivers/asahi/agx_blit.c @@ -323,6 +323,10 @@ agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter, util_blitter_save_vertex_elements(blitter, ctx->attributes); util_blitter_save_vertex_shader(blitter, ctx->stage[PIPE_SHADER_VERTEX].shader); + util_blitter_save_tessctrl_shader(blitter, + ctx->stage[PIPE_SHADER_TESS_CTRL].shader); + util_blitter_save_tesseval_shader(blitter, + ctx->stage[PIPE_SHADER_TESS_EVAL].shader); util_blitter_save_geometry_shader(blitter, ctx->stage[PIPE_SHADER_GEOMETRY].shader); util_blitter_save_rasterizer(blitter, ctx->rast); diff --git a/src/gallium/drivers/asahi/agx_disk_cache.c b/src/gallium/drivers/asahi/agx_disk_cache.c index e33a819098c..dc578a21754 100644 --- a/src/gallium/drivers/asahi/agx_disk_cache.c +++ b/src/gallium/drivers/asahi/agx_disk_cache.c @@ -37,6 +37,8 @@ agx_disk_cache_compute_key(struct disk_cache *cache, key_size = sizeof(shader_key->vs); else if (uncompiled->type == PIPE_SHADER_GEOMETRY) key_size = sizeof(shader_key->gs); + else if (uncompiled->type == PIPE_SHADER_TESS_CTRL) + key_size = sizeof(shader_key->tcs); else if (uncompiled->type == PIPE_SHADER_FRAGMENT) key_size = sizeof(shader_key->fs); else if (uncompiled->type == PIPE_SHADER_COMPUTE) @@ -68,8 +70,9 @@ agx_disk_cache_store(struct disk_cache *cache, if (!cache) return; - /* TODO: Support caching GS */ - if (uncompiled->type == PIPE_SHADER_GEOMETRY) + /* TODO: Support caching GS/TCS */ + if (uncompiled->type == PIPE_SHADER_GEOMETRY || + uncompiled->type == PIPE_SHADER_TESS_CTRL) return; assert(binary->bo->ptr.cpu != NULL && "shaders must be CPU mapped"); diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c index 0fcaf870ddb..53800dc6986 100644 --- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c +++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c @@ -11,6 +11,7 @@ #include "nir_builder_opcodes.h" #include "nir_intrinsics.h" #include "nir_intrinsics_indices.h" +#include "shader_enums.h" #define AGX_TEXTURE_DESC_STRIDE 24 @@ -89,8 +90,12 @@ load_sysval_indirect(nir_builder *b, unsigned dim, unsigned bitsize, static unsigned stage_table(nir_builder *b) { - assert(b->shader->info.stage < PIPE_SHADER_TYPES); - return AGX_SYSVAL_STAGE(b->shader->info.stage); + gl_shader_stage stage = b->shader->info.stage; + if (stage == MESA_SHADER_VERTEX && b->shader->info.vs.tes_agx) + stage = MESA_SHADER_TESS_EVAL; + + assert(stage < PIPE_SHADER_TYPES); + return AGX_SYSVAL_STAGE(stage); } static nir_def * @@ -161,6 +166,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, return load_sysval_root(b, 1, 64, &u->input_assembly); case nir_intrinsic_load_geometry_param_buffer_agx: return load_sysval_root(b, 1, 64, &u->geometry_params); + case nir_intrinsic_load_tess_param_buffer_agx: + return load_sysval_root(b, 1, 64, &u->tess_params); case nir_intrinsic_load_fixed_point_size_agx: return load_sysval_root(b, 1, 32, &u->fixed_point_size); case nir_intrinsic_load_tex_sprite_mask_agx: diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 989927e6ccb..9a1902a6e90 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -46,6 +46,7 @@ #include "agx_public.h" #include "agx_state.h" #include "agx_tilebuffer.h" +#include "shader_enums.h" /* Fake values, pending UAPI upstreaming */ #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED @@ -1533,6 +1534,7 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_FS_FINE_DERIVATIVE: case PIPE_CAP_CULL_DISTANCE_NOCOMBINE: case PIPE_CAP_NIR_COMPACT_ARRAYS: + case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS: return 1; case PIPE_CAP_CLIP_HALFZ: @@ -1699,7 +1701,10 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: return 4; + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + return 32; case PIPE_CAP_MAX_VARYINGS: + /* TODO: Probably should bump to 32? */ return 16; case PIPE_CAP_FLATSHADE: @@ -1726,7 +1731,8 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) BITFIELD_BIT(MESA_PRIM_LINES_ADJACENCY) | BITFIELD_BIT(MESA_PRIM_LINE_STRIP_ADJACENCY) | BITFIELD_BIT(MESA_PRIM_TRIANGLES_ADJACENCY) | - BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY); + BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) | + BITFIELD_BIT(MESA_PRIM_PATCHES); case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE: return 1; @@ -1790,6 +1796,8 @@ agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, case PIPE_SHADER_COMPUTE: case PIPE_SHADER_GEOMETRY: break; + case PIPE_SHADER_TESS_CTRL: + case PIPE_SHADER_TESS_EVAL: default: return false; } @@ -1836,7 +1844,11 @@ agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader, return 1; case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + return shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL; + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader == PIPE_SHADER_TESS_CTRL; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: case PIPE_SHADER_CAP_SUBROUTINES: case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 95003d44b33..0234010ff22 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -31,6 +31,8 @@ #include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" +#include "tessellator/p_tessellator.h" +#include "util/bitscan.h" #include "util/bitset.h" #include "util/blend.h" #include "util/blob.h" @@ -47,6 +49,7 @@ #include "util/u_resource.h" #include "util/u_transfer.h" #include "util/u_upload_mgr.h" +#include "agx_bo.h" #include "agx_device.h" #include "agx_disk_cache.h" #include "agx_nir_lower_gs.h" @@ -185,6 +188,24 @@ agx_set_blend_color(struct pipe_context *pctx, ctx->dirty |= AGX_DIRTY_BLEND_COLOR; } +static void +agx_set_patch_vertices(struct pipe_context *pctx, unsigned char n) +{ + struct agx_context *ctx = agx_context(pctx); + ctx->patch_vertices = n; +} + +static void +agx_set_tess_state(struct pipe_context *pctx, + const float default_outer_level[4], + const float default_inner_level[2]) +{ + struct agx_context *ctx = agx_context(pctx); + + memcpy(ctx->default_outer_level, default_outer_level, 4 * sizeof(float)); + memcpy(ctx->default_inner_level, default_inner_level, 2 * sizeof(float)); +} + static void * agx_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) @@ -586,6 +607,7 @@ static enum pipe_shader_type merged_stage(struct agx_context *ctx, enum pipe_shader_type stage) { switch (stage) { + case MESA_SHADER_VERTEX: case MESA_SHADER_GEOMETRY: return ctx->stage[PIPE_SHADER_TESS_EVAL].shader ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; @@ -1504,6 +1526,18 @@ asahi_fs_shader_key_equal(const void *a, const void *b) return memcmp(a, b, sizeof(struct asahi_fs_shader_key)) == 0; } +static uint32_t +asahi_tcs_shader_key_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct asahi_tcs_shader_key)); +} + +static bool +asahi_tcs_shader_key_equal(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct asahi_tcs_shader_key)) == 0; +} + /* No compute variants */ static uint32_t asahi_cs_shader_key_hash(const void *key) @@ -1837,6 +1871,22 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1, nir_metadata_block_index | nir_metadata_dominance, NULL); } + } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + struct asahi_tcs_shader_key *key = &key_->tcs; + + /* TODO: Deduplicate this logic from the GS case! */ + struct blob_reader vs_reader; + blob_reader_init(&vs_reader, linked_so->serialized_nir.data, + linked_so->serialized_nir.size); + nir_shader *vs = nir_deserialize(NULL, &agx_nir_options, &vs_reader); + + /* Apply the VS key to the VS before linking it in */ + NIR_PASS_V(vs, agx_nir_lower_vbo, key->attribs); + NIR_PASS_V(vs, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); + NIR_PASS_V(vs, agx_nir_lower_sysvals, false); + + NIR_PASS_V(nir, agx_nir_lower_tcs, vs, dev->libagx, key->index_size_B); + ralloc_free(vs); } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { struct asahi_gs_shader_key *key = &key_->gs; @@ -2040,6 +2090,8 @@ agx_get_shader_variant(struct agx_screen *screen, struct pipe_context *pctx, memcpy(cloned_key, key, sizeof(struct asahi_vs_shader_key)); } else if (so->type == PIPE_SHADER_GEOMETRY) { memcpy(cloned_key, key, sizeof(struct asahi_gs_shader_key)); + } else if (so->type == PIPE_SHADER_TESS_CTRL) { + memcpy(cloned_key, key, sizeof(struct asahi_tcs_shader_key)); } else { assert(gl_shader_stage_is_compute(so->type)); /* No key */ @@ -2057,8 +2109,6 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so, if (nir->info.stage == MESA_SHADER_KERNEL) nir->info.stage = MESA_SHADER_COMPUTE; - so->type = pipe_shader_type_from_mesa(nir->info.stage); - blob_init(&so->early_serialized_nir); nir_serialize(&so->early_serialized_nir, nir, true); @@ -2103,11 +2153,16 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so, nir_metadata_block_index | nir_metadata_dominance, NULL); } + if (nir->info.stage == MESA_SHADER_TESS_EVAL) { + NIR_PASS(_, nir, agx_nir_lower_tes, dev->libagx); + } + blob_init(&so->serialized_nir); nir_serialize(&so->serialized_nir, nir, true); _mesa_sha1_compute(so->serialized_nir.data, so->serialized_nir.size, so->nir_sha1); + so->type = pipe_shader_type_from_mesa(nir->info.stage); so->has_xfb_info = (nir->xfb_info != NULL); static_assert( @@ -2147,11 +2202,34 @@ agx_create_shader_state(struct pipe_context *pctx, } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { so->variants = _mesa_hash_table_create(NULL, asahi_gs_shader_key_hash, asahi_gs_shader_key_equal); + + } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { + /* No variants */ + so->variants = _mesa_hash_table_create(NULL, asahi_cs_shader_key_hash, + asahi_cs_shader_key_equal); + } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + so->variants = _mesa_hash_table_create(NULL, asahi_tcs_shader_key_hash, + asahi_tcs_shader_key_equal); } else { so->variants = _mesa_hash_table_create(so, asahi_fs_shader_key_hash, asahi_fs_shader_key_equal); } + if (nir->info.stage == MESA_SHADER_TESS_EVAL || + nir->info.stage == MESA_SHADER_TESS_CTRL) { + + so->tess.ccw = nir->info.tess.ccw; + so->tess.point_mode = nir->info.tess.point_mode; + so->tess.spacing = nir->info.tess.spacing; + so->tess.output_patch_size = nir->info.tess.tcs_vertices_out; + so->tess.primitive = nir->info.tess._primitive_mode; + so->tess.per_vertex_outputs = agx_tcs_per_vertex_outputs(nir); + so->tess.nr_patch_outputs = + util_last_bit(nir->info.patch_outputs_written); + if (nir->info.stage == MESA_SHADER_TESS_CTRL) + so->tess.output_stride = agx_tcs_output_stride(nir); + } + agx_shader_initialize(dev, so, nir, ctx->support_lod_bias); /* We're done with the NIR, throw it away */ @@ -2178,7 +2256,9 @@ agx_create_shader_state(struct pipe_context *pctx, } case PIPE_SHADER_GEOMETRY: - /* TODO: Geometry shaders with shader-db */ + case PIPE_SHADER_TESS_CTRL: + case PIPE_SHADER_TESS_EVAL: + /* TODO: Geometry/tessellation shaders with shader-db */ return so; case PIPE_SHADER_FRAGMENT: @@ -2276,7 +2356,7 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out, } struct agx_uncompiled_shader *linked_so = NULL; - if (stage == PIPE_SHADER_GEOMETRY) + if (stage == PIPE_SHADER_TESS_CTRL || stage == PIPE_SHADER_GEOMETRY) linked_so = ctx->stage[PIPE_SHADER_VERTEX].shader; struct agx_screen *screen = agx_screen(ctx->base.screen); @@ -2351,6 +2431,30 @@ translate_ia_mode(enum mesa_prim prim) } } +static bool +agx_update_tcs(struct agx_context *ctx, const struct pipe_draw_info *info) +{ + assert(info->mode == MESA_PRIM_PATCHES); + + /* We don't bother to dirty track yet, update! */ + struct asahi_tcs_shader_key key = { + .index_size_B = info->index_size, + }; + + memcpy(key.attribs, ctx->attributes, + sizeof(key.attribs[0]) * AGX_MAX_ATTRIBS); + + static_assert(sizeof(key.input_nir_sha1) == + sizeof(ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1), + "common size for shader sha-1"); + + memcpy(key.input_nir_sha1, ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1, + sizeof(key.input_nir_sha1)); + + return agx_update_shader(ctx, &ctx->tcs, PIPE_SHADER_TESS_CTRL, + (union asahi_shader_key *)&key); +} + /* * Triangle strips and fans are rotated based on the provoking vertex, but other * primitive types are not and do not need to know the provoking vertex. @@ -2487,6 +2591,18 @@ agx_bind_gs_state(struct pipe_context *pctx, void *cso) agx_bind_shader_state(pctx, cso, PIPE_SHADER_GEOMETRY); } +static void +agx_bind_tcs_state(struct pipe_context *pctx, void *cso) +{ + agx_bind_shader_state(pctx, cso, PIPE_SHADER_TESS_CTRL); +} + +static void +agx_bind_tes_state(struct pipe_context *pctx, void *cso) +{ + agx_bind_shader_state(pctx, cso, PIPE_SHADER_TESS_EVAL); +} + static void agx_bind_cs_state(struct pipe_context *pctx, void *cso) { @@ -2850,7 +2966,7 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs, if (stage == PIPE_SHADER_FRAGMENT) { agx_usc_tilebuffer(&b, &batch->tilebuffer_layout); - } else if (stage == PIPE_SHADER_COMPUTE) { + } else if (stage == PIPE_SHADER_COMPUTE || stage == PIPE_SHADER_TESS_CTRL) { unsigned size = cs->info.local_size + variable_shared_mem; agx_usc_pack(&b, SHARED, cfg) { @@ -4185,6 +4301,291 @@ util_draw_multi_upload_indirect(struct pipe_context *pctx, pctx->draw_vbo(pctx, info, 0, &indirect_, draws, 1); } +static void +agx_upload_draw_params(struct agx_batch *batch, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + const struct pipe_draw_info *info) +{ + if (indirect) { + struct agx_resource *indirect_rsrc = agx_resource(indirect->buffer); + uint64_t address = indirect_rsrc->bo->ptr.gpu + indirect->offset; + agx_batch_reads(batch, indirect_rsrc); + + /* To implement draw parameters, we use the last 2 words of the + * indirect draw descriptor. Offset by 3 words for indexed draw (5 + * total) and 2 words for non-indexed (4 total). See the layouts of + * indexed vs non-indexed draw descriptors. + * + * This gives us a consistent layout + * + * uint32_t first_vertex; + * uint32_t base_instance; + * + * and we can implement load_first_vertex & load_base_instance without + * checking for indexing. + */ + uint32_t offset = info->index_size ? 3 : 2; + batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] = address + offset * 4; + } else { + /* Upload just those two words. */ + uint32_t params[2] = { + info->index_size ? draws->index_bias : draws->start, + info->start_instance, + }; + + batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] = + agx_pool_upload_aligned(&batch->pool, params, sizeof(params), 4); + } +} + +static void +agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + unsigned num_draws) +{ + struct agx_device *dev = agx_device(ctx->base.screen); + perf_debug(dev, "Tessellation"); + + struct agx_uncompiled_shader *tcs = ctx->stage[MESA_SHADER_TESS_CTRL].shader; + struct agx_uncompiled_shader *tes = ctx->stage[MESA_SHADER_TESS_EVAL].shader; + + assert(tes != NULL && "required with patches"); + + unsigned patch_vertices = ctx->patch_vertices; + + /* OpenGL allows omitting the tcs, fill in a passthrough program if needed. + * In principle, we could optimize this case, but I don't think it matters. + */ + bool unbind_tcs_when_done = false; + if (!tcs) { + struct agx_uncompiled_shader *vs = ctx->stage[MESA_SHADER_VERTEX].shader; + + assert(patch_vertices >= 1 && + patch_vertices <= ARRAY_SIZE(vs->passthrough_tcs)); + + if (!vs->passthrough_tcs[patch_vertices - 1]) { + struct blob_reader reader; + blob_reader_init(&reader, vs->early_serialized_nir.data, + vs->early_serialized_nir.size); + nir_shader *vs_nir = nir_deserialize(NULL, &agx_nir_options, &reader); + nir_shader *nir = nir_create_passthrough_tcs(&agx_nir_options, vs_nir, + patch_vertices); + ralloc_free(vs_nir); + + /* Lower the tess level sysvals and gather info, since mesa/st won't do + * either for us. + */ + NIR_PASS(_, nir, nir_lower_system_values); + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + vs->passthrough_tcs[patch_vertices - 1] = + pipe_shader_from_nir(&ctx->base, nir); + } + + tcs = vs->passthrough_tcs[patch_vertices - 1]; + ctx->base.bind_tcs_state(&ctx->base, tcs); + unbind_tcs_when_done = true; + } + + unsigned in_vertices = draws->count; + unsigned in_patches = in_vertices / patch_vertices; + + if (in_patches == 0) + return; + + struct agx_batch *batch = agx_get_compute_batch(ctx); + agx_batch_init_state(batch); + + struct pipe_resource *heap = + pipe_buffer_create(ctx->base.screen, PIPE_BIND_GLOBAL, PIPE_USAGE_DEFAULT, + 1024 * 1024 * 128); + + uint64_t heap_gpu = agx_resource(heap)->bo->ptr.gpu; + uint8_t *heap_cpu = agx_resource(heap)->bo->ptr.cpu; + + unsigned unrolled_patch_count = in_patches * info->instance_count; + + uint32_t heap_water = 0; + uint32_t tcs_out_offs = heap_water; + heap_water += ALIGN(unrolled_patch_count * tcs->tess.output_stride, 4); + + agx_batch_writes(batch, agx_resource(heap), 0); + + uint64_t ib = 0; + size_t ib_extent = 0; + + if (info->index_size) + ib = agx_index_buffer_ptr(batch, info, draws, &ib_extent); + + agx_upload_ia_params(batch, info, indirect, ib, ib_extent, 0); + agx_upload_draw_params(batch, indirect, draws, info); + + /* Setup parameters */ + struct agx_tess_params tess_params = { + .tcs_buffer = heap_gpu + tcs_out_offs, + .input_patch_size = patch_vertices, + .output_patch_size = tcs->tess.output_patch_size, + .tcs_patch_constants = tcs->tess.nr_patch_outputs, + .tcs_per_vertex_outputs = tcs->tess.per_vertex_outputs, + .patch_coord_buffer = heap_gpu, + .patches_per_instance = in_patches, + }; + + memcpy(&tess_params.tess_level_outer_default, ctx->default_outer_level, + sizeof(ctx->default_outer_level)); + memcpy(&tess_params.tess_level_inner_default, ctx->default_inner_level, + sizeof(ctx->default_inner_level)); + + batch->uniforms.tess_params = + agx_pool_upload(&batch->pool, &tess_params, sizeof(tess_params)); + + /* Run VS+TCS as compute */ + agx_upload_vbos(batch); + agx_update_vs(ctx); + agx_update_tcs(ctx, info); + /* XXX */ + ctx->stage[PIPE_SHADER_TESS_CTRL].dirty = ~0; + ctx->stage[PIPE_SHADER_TESS_EVAL].dirty = ~0; + agx_update_descriptors(batch, ctx->vs, PIPE_SHADER_VERTEX); + agx_update_descriptors(batch, ctx->tcs, PIPE_SHADER_TESS_CTRL); + + struct pipe_grid_info tcs_grid = { + .block = {MAX2(patch_vertices, tcs->tess.output_patch_size), 1, 1}, + .grid = {in_patches, info->instance_count, 1}, + /* XXX */ + .variable_shared_mem = 32768, + }; + + agx_launch(batch, &tcs_grid, ctx->tcs, PIPE_SHADER_TESS_CTRL); + + agx_flush_all(ctx, "HACK"); + agx_sync_all(ctx, "HACK"); + + /* Setup batch */ + batch = agx_get_batch(ctx); + + enum tess_primitive_mode mode = + MAX2(tcs->tess.primitive, tes->tess.primitive); + enum gl_tess_spacing spacing = MAX2(tcs->tess.spacing, tes->tess.spacing); + + enum pipe_tess_spacing pspacing = spacing == TESS_SPACING_EQUAL + ? PIPE_TESS_SPACING_EQUAL + : spacing == TESS_SPACING_FRACTIONAL_ODD + ? PIPE_TESS_SPACING_FRACTIONAL_ODD + : PIPE_TESS_SPACING_FRACTIONAL_EVEN; + + bool point_mode = MAX2(tcs->tess.point_mode, tes->tess.point_mode); + enum mesa_prim in_prim = mode == TESS_PRIMITIVE_ISOLINES ? MESA_PRIM_LINES + : mode == TESS_PRIMITIVE_QUADS + ? MESA_PRIM_QUADS + : MESA_PRIM_TRIANGLES; + enum mesa_prim out_prim = point_mode ? MESA_PRIM_POINTS + : mode == TESS_PRIMITIVE_ISOLINES + ? MESA_PRIM_LINES + : MESA_PRIM_TRIANGLES; + + struct pipe_tessellator *tess = + p_tess_init(in_prim, pspacing, tes->tess.ccw, point_mode); + + struct pipe_tessellator_data data = {0}; + + /* Mem allocate */ + uint32_t patch_coord_offs_offs = heap_water; + tess_params.patch_coord_offs = heap_gpu + heap_water; + heap_water += align(4 * unrolled_patch_count, 4); + + uint32_t draws_off = heap_water; + uint32_t *patch_draws = (uint32_t *)(heap_cpu + heap_water); + heap_water += align(sizeof(uint32_t) * 5 * unrolled_patch_count, 4); + + uint32_t *patch_offs = (uint32_t *)(heap_cpu + patch_coord_offs_offs); + + for (unsigned patch = 0; patch < unrolled_patch_count; ++patch) { + float *addr = + (float *)(heap_cpu + tcs_out_offs + tcs->tess.output_stride * patch); + + struct pipe_tessellation_factors factors = { + .outer_tf = {addr[0], addr[1], addr[2], addr[3]}, + .inner_tf = {addr[4], addr[5]}, + }; + p_tessellate(tess, &factors, &data); + + /* Mem allocate indices */ + uint32_t index_off = heap_water; + uint16_t *indices = (uint16_t *)(heap_cpu + heap_water); + heap_water += align(sizeof(*indices) * data.num_indices, 4); + + for (unsigned idx = 0; idx < data.num_indices; ++idx) { + indices[idx] = data.indices[idx]; + } + + /* Mem allocate patch coords */ + heap_water = align(heap_water, 8); + patch_offs[patch] = heap_water / 8; + float *patch_coords = (float *)(heap_cpu + heap_water); + heap_water += align(8 * data.num_domain_points, 4); + + for (unsigned p = 0; p < data.num_domain_points; ++p) { + patch_coords[2 * p + 0] = data.domain_points_u[p]; + patch_coords[2 * p + 1] = data.domain_points_v[p]; + } + assert(data.num_indices < 32768); + assert(data.num_domain_points < 8192); + + /* Generate a draw for the patch */ + uint32_t *desc = patch_draws + (patch * 5); + + desc[0] = data.num_indices; /* count */ + desc[1] = 1; /* instance_count */ + desc[2] = index_off / sizeof(*indices); /* start */ + desc[3] = patch * LIBAGX_TES_PATCH_ID_STRIDE; /* index_bias */ + desc[4] = 0; /* start_instance */ + } + p_tess_destroy(tess); + + /* Run TES as VS */ + agx_batch_init_state(batch); + void *vs_cso = ctx->stage[PIPE_SHADER_VERTEX].shader; + ctx->base.bind_vs_state(&ctx->base, + ctx->stage[PIPE_SHADER_TESS_EVAL].shader); + agx_update_vs(ctx); + agx_update_descriptors(batch, ctx->vs, PIPE_SHADER_TESS_EVAL); + + struct pipe_draw_info draw_info = { + .mode = out_prim, + .index_size = 2, + .index.resource = heap, + .instance_count = 1, + .view_mask = info->view_mask, + }; + + /* Wrap the pool allocation in a fake resource for meta-Gallium use */ + struct pipe_draw_indirect_info copy_indirect = { + .buffer = heap, + .offset = draws_off, + .stride = 5 * sizeof(uint32_t), + .draw_count = in_patches * info->instance_count, + }; + + batch->uniforms.tess_params = + agx_pool_upload(&batch->pool, &tess_params, sizeof(tess_params)); + + ctx->base.draw_vbo(&ctx->base, &draw_info, 0, ©_indirect, NULL, 1); + + /* Restore vertex state */ + ctx->base.bind_vs_state(&ctx->base, vs_cso); + + pipe_resource_reference(&heap, NULL); + + if (unbind_tcs_when_done) { + ctx->base.bind_tcs_state(&ctx->base, NULL); + } +} + static void agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, unsigned drawid_offset, @@ -4205,7 +4606,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, assert(drawid_offset == 0); assert(num_draws == 1); - util_draw_multi_upload_indirect(pctx, info, indirect, draws); + util_draw_multi_unroll_indirect(pctx, info, indirect, draws); return; } @@ -4214,6 +4615,17 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, return; } + /* TODO: stop cheating */ + if (info->mode == MESA_PRIM_PATCHES && indirect) { + perf_debug_ctx(ctx, "indirect tessellation"); + util_draw_indirect(pctx, info, indirect); + } + + if (info->mode == MESA_PRIM_PATCHES) { + agx_draw_patches(ctx, info, drawid_offset, indirect, draws, num_draws); + return; + } + if (agx_needs_passthrough_gs(ctx, info, indirect)) { agx_apply_passthrough_gs(ctx, info, drawid_offset, indirect, draws, num_draws); @@ -4330,39 +4742,9 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, enum mesa_prim mode = info->mode; if (ctx->vs->info.uses_base_param || ctx->gs) { + agx_upload_draw_params(batch, indirect, draws, info); + batch->uniforms.is_indexed_draw = (idx_size > 0); - - if (indirect) { - struct agx_resource *indirect_rsrc = agx_resource(indirect->buffer); - uint64_t address = indirect_rsrc->bo->ptr.gpu + indirect->offset; - agx_batch_reads(batch, indirect_rsrc); - - /* To implement draw parameters, we use the last 2 words of the - * indirect draw descriptor. Offset by 3 words for indexed draw (5 - * total) and 2 words for non-indexed (4 total). See the layouts of - * indexed vs non-indexed draw descriptors. - * - * This gives us a consistent layout - * - * uint32_t first_vertex; - * uint32_t base_instance; - * - * and we can implement load_first_vertex & load_base_instance without - * checking for indexing. - */ - uint32_t offset = idx_size ? 3 : 2; - batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] = address + offset * 4; - } else { - /* Upload just those two words. */ - uint32_t params[2] = { - idx_size ? draws->index_bias : draws->start, - info->start_instance, - }; - - batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] = - agx_pool_upload_aligned(&batch->pool, params, sizeof(params), 4); - } - ctx->dirty |= AGX_DIRTY_VS; } @@ -4774,6 +5156,8 @@ agx_init_state_functions(struct pipe_context *ctx) ctx->create_vertex_elements_state = agx_create_vertex_elements; ctx->create_vs_state = agx_create_shader_state; ctx->create_gs_state = agx_create_shader_state; + ctx->create_tcs_state = agx_create_shader_state; + ctx->create_tes_state = agx_create_shader_state; ctx->create_compute_state = agx_create_compute_state; ctx->bind_blend_state = agx_bind_blend_state; ctx->bind_depth_stencil_alpha_state = agx_bind_zsa_state; @@ -4783,6 +5167,8 @@ agx_init_state_functions(struct pipe_context *ctx) ctx->bind_vertex_elements_state = agx_bind_vertex_elements_state; ctx->bind_vs_state = agx_bind_vs_state; ctx->bind_gs_state = agx_bind_gs_state; + ctx->bind_tcs_state = agx_bind_tcs_state; + ctx->bind_tes_state = agx_bind_tes_state; ctx->bind_compute_state = agx_bind_cs_state; ctx->delete_blend_state = agx_delete_state; ctx->delete_depth_stencil_alpha_state = agx_delete_state; @@ -4793,6 +5179,8 @@ agx_init_state_functions(struct pipe_context *ctx) ctx->delete_vertex_elements_state = agx_delete_state; ctx->delete_vs_state = agx_delete_shader_state; ctx->delete_gs_state = agx_delete_shader_state; + ctx->delete_tcs_state = agx_delete_shader_state; + ctx->delete_tes_state = agx_delete_shader_state; ctx->set_blend_color = agx_set_blend_color; ctx->set_clip_state = agx_set_clip_state; ctx->set_constant_buffer = agx_set_constant_buffer; @@ -4801,6 +5189,7 @@ agx_init_state_functions(struct pipe_context *ctx) ctx->set_sampler_views = agx_set_sampler_views; ctx->set_framebuffer_state = agx_set_framebuffer_state; ctx->set_polygon_stipple = agx_set_polygon_stipple; + ctx->set_patch_vertices = agx_set_patch_vertices; ctx->set_sample_mask = agx_set_sample_mask; ctx->set_scissor_states = agx_set_scissor_states; ctx->set_stencil_ref = agx_set_stencil_ref; @@ -4813,4 +5202,5 @@ agx_init_state_functions(struct pipe_context *ctx) ctx->set_global_binding = agx_set_global_binding; ctx->texture_barrier = agx_texture_barrier; ctx->get_compute_state_info = agx_get_compute_state_info; + ctx->set_tess_state = agx_set_tess_state; } diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 6f2ded18bb2..e6a229bf836 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -106,6 +106,9 @@ struct PACKED agx_draw_uniforms { /* Address of input assembly buffer if geom/tess is used, else 0 */ uint64_t input_assembly; + /* Address of tessellation param buffer if tessellation is used, else 0 */ + uint64_t tess_params; + /* Address of geometry param buffer if geometry shaders are used, else 0 */ uint64_t geometry_params; @@ -213,6 +216,7 @@ struct agx_uncompiled_shader { struct agx_uncompiled_shader_info info; struct hash_table *variants; struct agx_uncompiled_shader *passthrough_progs[MESA_PRIM_COUNT][3][2]; + struct agx_uncompiled_shader *passthrough_tcs[32]; uint32_t xfb_strides[4]; bool has_xfb_info; @@ -222,6 +226,18 @@ struct agx_uncompiled_shader { /* Set on VS, passed to FS for linkage */ unsigned base_varying; + + /* Tessellation info */ + struct { + uint64_t per_vertex_outputs; + uint32_t output_stride; + enum gl_tess_spacing spacing; + enum tess_primitive_mode primitive; + uint8_t output_patch_size; + uint8_t nr_patch_outputs; + bool ccw; + bool point_mode; + } tess; }; enum agx_stage_dirty { @@ -407,6 +423,18 @@ struct asahi_fs_shader_key { enum pipe_format rt_formats[PIPE_MAX_COLOR_BUFS]; }; +struct asahi_tcs_shader_key { + /* Input assembly key. Simplified because we know we're operating on patches. + */ + uint8_t index_size_B; + + /* Vertex shader key */ + struct agx_attribute attribs[AGX_MAX_VBUFS]; + + /* Tessellation control shaders must be linked with a vertex shader. */ + uint8_t input_nir_sha1[20]; +}; + struct asahi_gs_shader_key { /* Input assembly key */ struct agx_ia_key ia; @@ -426,6 +454,7 @@ struct asahi_gs_shader_key { union asahi_shader_key { struct asahi_vs_shader_key vs; + struct asahi_tcs_shader_key tcs; struct asahi_gs_shader_key gs; struct asahi_fs_shader_key fs; }; @@ -498,7 +527,7 @@ struct asahi_blitter { struct agx_context { struct pipe_context base; - struct agx_compiled_shader *vs, *fs, *gs; + struct agx_compiled_shader *vs, *fs, *gs, *tcs, *tes; uint32_t dirty; /* Heap for dynamic memory allocation for geometry/tessellation shaders */ @@ -527,6 +556,10 @@ struct agx_context { struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; uint32_t vb_mask; + unsigned patch_vertices; + float default_outer_level[4]; + float default_inner_level[2]; + struct agx_stage stage[PIPE_SHADER_TYPES]; struct agx_attribute *attributes; struct agx_rasterizer *rast;