diff --git a/src/asahi/lib/agx_ppp.h b/src/asahi/lib/agx_ppp.h new file mode 100644 index 00000000000..2b38f7f054c --- /dev/null +++ b/src/asahi/lib/agx_ppp.h @@ -0,0 +1,141 @@ +/* + * Copyright 2022 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef AGX_PPP_H +#define AGX_PPP_H + +#include "asahi/lib/agx_pack.h" + +/* Opaque structure representing a PPP update */ +struct agx_ppp_update { + uint8_t *head; + uint64_t gpu_base; + size_t total_size; + +#ifndef NDEBUG + uint8_t *cpu_base; +#endif +}; + +static size_t +agx_ppp_update_size(struct AGX_PPP_HEADER *present) +{ + size_t size = AGX_PPP_HEADER_LENGTH; + +#define PPP_CASE(x, y) if (present->x) size += AGX_ ## y ##_LENGTH; + PPP_CASE(fragment_control, FRAGMENT_CONTROL); + PPP_CASE(fragment_control_2, FRAGMENT_CONTROL_2); + PPP_CASE(fragment_front_face, FRAGMENT_FACE); + PPP_CASE(fragment_front_face_2, FRAGMENT_FACE_2); + PPP_CASE(fragment_front_stencil, FRAGMENT_STENCIL); + PPP_CASE(fragment_back_face, FRAGMENT_FACE); + PPP_CASE(fragment_back_face_2, FRAGMENT_FACE_2); + PPP_CASE(fragment_back_stencil, FRAGMENT_STENCIL); + PPP_CASE(depth_bias_scissor, DEPTH_BIAS_SCISSOR); + PPP_CASE(region_clip, REGION_CLIP); + PPP_CASE(viewport, VIEWPORT); + PPP_CASE(w_clamp, W_CLAMP); + PPP_CASE(output_select, OUTPUT_SELECT); + PPP_CASE(varying_word_0, VARYING_0); + PPP_CASE(varying_word_1, VARYING_1); + PPP_CASE(cull, CULL); + PPP_CASE(cull_2, CULL_2); + PPP_CASE(fragment_shader, FRAGMENT_SHADER); + PPP_CASE(occlusion_query, FRAGMENT_OCCLUSION_QUERY); + PPP_CASE(occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2); + PPP_CASE(output_unknown, OUTPUT_UNKNOWN); + PPP_CASE(output_size, OUTPUT_SIZE); + PPP_CASE(varying_word_2, VARYING_2); +#undef PPP_CASE + + assert((size % 4) == 0 && "PPP updates are aligned"); + return size; +} + +static inline bool +agx_ppp_validate(struct agx_ppp_update *ppp, size_t size) +{ +#ifndef NDEBUG + /* Assert that we don't overflow. Ideally we'd assert that types match too + * but that's harder to do at the moment. + */ + assert(((ppp->head - ppp->cpu_base) + size) <= ppp->total_size); +#endif + + return true; +} + +#define agx_ppp_push(ppp, T, name) \ + for (bool it = agx_ppp_validate((ppp), AGX_##T##_LENGTH); it; it = false, \ + (ppp)->head += AGX_##T##_LENGTH) \ + agx_pack((ppp)->head, T, name) + +#define agx_ppp_push_packed(ppp, src, T) do { \ + agx_ppp_validate((ppp), AGX_##T##_LENGTH); \ + memcpy((ppp)->head, src, AGX_##T##_LENGTH); \ + (ppp)->head += AGX_##T##_LENGTH; \ + } while(0) \ + +static inline struct agx_ppp_update +agx_new_ppp_update(struct agx_pool *pool, struct AGX_PPP_HEADER present) +{ + size_t size = agx_ppp_update_size(&present); + struct agx_ptr T = agx_pool_alloc_aligned(pool, size, 64); + + struct agx_ppp_update ppp = { + .gpu_base = T.gpu, + .head = T.cpu, + .total_size = size, +#ifndef NDEBUG + .cpu_base = T.cpu, +#endif + }; + + agx_ppp_push(&ppp, PPP_HEADER, cfg) { cfg = present; } + + return ppp; +} + +static inline void +agx_ppp_fini(uint8_t **out, struct agx_ppp_update *ppp) +{ + size_t size = ppp->total_size; + assert((size % 4) == 0); + size_t size_words = size / 4; + +#ifndef NDEBUG + assert(size == (ppp->head - ppp->cpu_base) && "mismatched ppp size"); +#endif + + assert(ppp->gpu_base < (1ull << 40)); + assert(size_words < (1ull << 24)); + + agx_pack(*out, RECORD, cfg) { + cfg.pointer_hi = (ppp->gpu_base >> 32); + cfg.pointer_lo = (uint32_t) ppp->gpu_base; + cfg.size_words = size_words; + }; + + *out += AGX_RECORD_LENGTH; +} + +#endif diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index 3eb4d52eb1b..1ab092579be 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -288,33 +288,6 @@ - - - - - - - - - - - - - - - - - - - - - - @@ -332,7 +305,53 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -342,7 +361,12 @@ - + + + + + + @@ -351,7 +375,7 @@ - + @@ -361,71 +385,90 @@ - + + + + + + + + + + - - - - - - - - + + - - - + + - - - - - - - - - - - - + + + + + + + + - - - - - - - - - + + - - - + + - - - - - - - + + + + + + + + + + + + + + + + + + + + - - - - + + + + + + + + + + + + + + + + + + + @@ -531,22 +574,6 @@ - - - - - - - - - - - - - - - diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index d8980d49a37..0f3f36f252c 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -253,7 +253,6 @@ agxdecode_map_read_write(void) #define agxdecode_msg(str) fprintf(agxdecode_dump_stream, "// %s", str) unsigned agxdecode_indent = 0; -uint64_t pipeline_base = 0; static void agxdecode_dump_bo(struct agx_bo *bo, const char *name) @@ -395,60 +394,70 @@ agxdecode_pipeline(const uint8_t *map, UNUSED bool verbose) } } +#define PPP_PRINT(map, header_name, struct_name, human) \ + if (hdr.header_name) { \ + assert(((map + AGX_##struct_name##_LENGTH) <= (base + size)) && \ + "buffer overrun in PPP update"); \ + DUMP_CL(struct_name, map, human); \ + map += AGX_##struct_name##_LENGTH; \ + } + static void agxdecode_record(uint64_t va, size_t size, bool verbose) { - uint8_t *map = agxdecode_fetch_gpu_mem(va, size); - uint32_t tag = 0; - memcpy(&tag, map, 4); + uint8_t *base = agxdecode_fetch_gpu_mem(va, size); + uint8_t *map = base; - if (tag == 0x00000C00) { - assert(size == AGX_VIEWPORT_LENGTH); - DUMP_CL(VIEWPORT, map, "Viewport"); - } else if (tag == 0x100C0000) { - assert(size == AGX_INTERPOLATION_LENGTH); - DUMP_CL(INTERPOLATION, map, "Interpolation"); - } else if (tag == 0x0C020000) { - assert(size == AGX_LINKAGE_LENGTH); - DUMP_CL(LINKAGE, map, "Linkage"); - } else if (tag == 0x200004a) { - assert(size == AGX_UNKNOWN_4A_LENGTH); - DUMP_CL(UNKNOWN_4A, map, "Unknown 4a"); - } else if (tag == 0x10000b5) { - assert(size == AGX_RASTERIZER_LENGTH); - DUMP_CL(RASTERIZER, map, "Rasterizer"); - } else if (tag == 0x200000) { - assert(size == AGX_CULL_LENGTH); - DUMP_CL(CULL, map, "Cull"); - } else if (tag == 0x000100) { - assert(size == AGX_SET_INDEX_LENGTH); - DUMP_CL(SET_INDEX, map, "Set index"); - } else if (tag == 0x800000) { - assert(size == AGX_BIND_FRAGMENT_PIPELINE_LENGTH); + agx_unpack(agxdecode_dump_stream, map, PPP_HEADER, hdr); + map += AGX_PPP_HEADER_LENGTH; - agx_unpack(agxdecode_dump_stream, map, BIND_FRAGMENT_PIPELINE, cmd); - agxdecode_stateful(cmd.pipeline, "Pipeline", agxdecode_pipeline, verbose); + PPP_PRINT(map, fragment_control, FRAGMENT_CONTROL, "Fragment control"); + PPP_PRINT(map, fragment_control_2, FRAGMENT_CONTROL_2, "Fragment control 2"); + PPP_PRINT(map, fragment_front_face, FRAGMENT_FACE, "Front face"); + PPP_PRINT(map, fragment_front_face_2, FRAGMENT_FACE_2, "Front face 2"); + PPP_PRINT(map, fragment_front_stencil, FRAGMENT_STENCIL, "Front stencil"); + PPP_PRINT(map, fragment_back_face, FRAGMENT_FACE, "Back face"); + PPP_PRINT(map, fragment_back_face_2, FRAGMENT_FACE_2, "Back face 2"); + PPP_PRINT(map, fragment_back_stencil, FRAGMENT_STENCIL, "Back stencil"); + PPP_PRINT(map, depth_bias_scissor, DEPTH_BIAS_SCISSOR, "Depth bias/scissor"); + PPP_PRINT(map, region_clip, REGION_CLIP, "Region clip"); + PPP_PRINT(map, viewport, VIEWPORT, "Viewport"); + PPP_PRINT(map, w_clamp, W_CLAMP, "W clamp"); + PPP_PRINT(map, output_select, OUTPUT_SELECT, "Output select"); + PPP_PRINT(map, varying_word_0, VARYING_0, "Varying word 0"); + PPP_PRINT(map, varying_word_1, VARYING_1, "Varying word 1"); + PPP_PRINT(map, cull, CULL, "Cull"); + PPP_PRINT(map, cull_2, CULL_2, "Cull 2"); - if (cmd.cf_bindings) { - uint8_t *map = agxdecode_fetch_gpu_mem(cmd.cf_bindings, 128); - hexdump(agxdecode_dump_stream, map, 128, false); + if (hdr.fragment_shader) { + agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER, frag); + agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_pipeline, verbose); - DUMP_CL(CF_BINDING_HEADER, map, "Coefficient binding header:"); - map += AGX_CF_BINDING_HEADER_LENGTH; + if (frag.cf_bindings) { + uint8_t *cf = agxdecode_fetch_gpu_mem(frag.cf_bindings, 128); + hexdump(agxdecode_dump_stream, cf, 128, false); - for (unsigned i = 0; i < cmd.cf_binding_count; ++i) { - DUMP_CL(CF_BINDING, map, "Coefficient binding:"); - map += AGX_CF_BINDING_LENGTH; + DUMP_CL(CF_BINDING_HEADER, cf, "Coefficient binding header:"); + cf += AGX_CF_BINDING_HEADER_LENGTH; + + for (unsigned i = 0; i < frag.cf_binding_count; ++i) { + DUMP_CL(CF_BINDING, cf, "Coefficient binding:"); + cf += AGX_CF_BINDING_LENGTH; } } - DUMP_UNPACKED(BIND_FRAGMENT_PIPELINE, cmd, "Bind fragment pipeline\n"); - } else if (size == 0) { - pipeline_base = va; - } else { - fprintf(agxdecode_dump_stream, "Record %" PRIx64 "\n", va); - hexdump(agxdecode_dump_stream, map, size, false); + DUMP_UNPACKED(FRAGMENT_SHADER, frag, "Fragment shader\n"); + map += AGX_FRAGMENT_SHADER_LENGTH; } + + PPP_PRINT(map, occlusion_query, FRAGMENT_OCCLUSION_QUERY, "Occlusion query"); + PPP_PRINT(map, occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2, "Occlusion query 2"); + PPP_PRINT(map, output_unknown, OUTPUT_UNKNOWN, "Output unknown"); + PPP_PRINT(map, output_size, OUTPUT_SIZE, "Output size"); + PPP_PRINT(map, varying_word_2, VARYING_2, "Varying word 2"); + + /* PPP print checks we don't read too much, now check we read enough */ + assert(map == (base + size) && "invalid size of PPP update"); } static unsigned diff --git a/src/asahi/lib/meson.build b/src/asahi/lib/meson.build index 61d86eaed20..9452bba71f3 100644 --- a/src/asahi/lib/meson.build +++ b/src/asahi/lib/meson.build @@ -24,6 +24,7 @@ dep_iokit = dependency('IOKit', required : false) libasahi_lib_files = files( 'agx_device.c', 'agx_formats.c', + 'agx_ppp.h', 'pool.c', ) diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 9c1495fa11d..86868ef2ceb 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -43,6 +43,7 @@ #include "agx_state.h" #include "asahi/lib/agx_pack.h" #include "asahi/lib/agx_formats.h" +#include "asahi/lib/agx_ppp.h" static struct pipe_stream_output_target * agx_create_stream_output_target(struct pipe_context *pctx, @@ -628,19 +629,14 @@ agx_set_viewport_states(struct pipe_context *pctx, ctx->viewport = *vp; } -struct agx_viewport_scissor { - uint64_t viewport; - unsigned scissor; -}; - -static struct agx_viewport_scissor +static void agx_upload_viewport_scissor(struct agx_pool *pool, struct agx_batch *batch, + uint8_t **out, const struct pipe_viewport_state *vp, - const struct pipe_scissor_state *ss) + const struct pipe_scissor_state *ss, + unsigned zbias) { - struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_VIEWPORT_LENGTH, 64); - float trans_x = vp->translate[0], trans_y = vp->translate[1]; float abs_scale_x = fabsf(vp->scale[0]), abs_scale_y = fabsf(vp->scale[1]); @@ -665,21 +661,6 @@ agx_upload_viewport_scissor(struct agx_pool *pool, float minz, maxz; util_viewport_zmin_zmax(vp, false, &minz, &maxz); - agx_pack(T.cpu, VIEWPORT, cfg) { - cfg.min_tile_x = minx / 32; - cfg.min_tile_y = miny / 32; - cfg.max_tile_x = DIV_ROUND_UP(maxx, 32); - cfg.max_tile_y = DIV_ROUND_UP(maxy, 32); - cfg.clip_tile = true; - - cfg.translate_x = vp->translate[0]; - cfg.translate_y = vp->translate[1]; - cfg.translate_z = vp->translate[2]; - cfg.scale_x = vp->scale[0]; - cfg.scale_y = vp->scale[1]; - cfg.scale_z = vp->scale[2]; - } - /* Allocate a new scissor descriptor */ struct agx_scissor_packed *ptr = batch->scissor.bo->ptr.cpu; unsigned index = (batch->scissor.count++); @@ -693,10 +674,36 @@ agx_upload_viewport_scissor(struct agx_pool *pool, cfg.max_z = maxz; } - return (struct agx_viewport_scissor) { - .viewport = T.gpu, - .scissor = index + /* Upload state */ + struct agx_ppp_update ppp = agx_new_ppp_update(pool, (struct AGX_PPP_HEADER) { + .depth_bias_scissor = true, + .region_clip = true, + .viewport = true, + }); + + agx_ppp_push(&ppp, DEPTH_BIAS_SCISSOR, cfg) { + cfg.scissor = index; + cfg.depth_bias = zbias; }; + + agx_ppp_push(&ppp, REGION_CLIP, cfg) { + cfg.enable = true; + cfg.min_x = minx / 32; + cfg.min_y = miny / 32; + cfg.max_x = DIV_ROUND_UP(maxx, 32); + cfg.max_y = DIV_ROUND_UP(maxy, 32); + } + + agx_ppp_push(&ppp, VIEWPORT, cfg) { + cfg.translate_x = vp->translate[0]; + cfg.translate_y = vp->translate[1]; + cfg.translate_z = vp->translate[2]; + cfg.scale_x = vp->scale[0]; + cfg.scale_y = vp->scale[1]; + cfg.scale_z = vp->scale[2]; + } + + agx_ppp_fini(out, &ppp); } static uint16_t @@ -1441,90 +1448,6 @@ agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, return ptr.gpu; } -static uint64_t -demo_launch_fragment(struct agx_context *ctx, struct agx_pool *pool, uint32_t pipeline, uint32_t varyings, unsigned input_count) -{ - struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_BIND_FRAGMENT_PIPELINE_LENGTH, 64); - - unsigned tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count; - agx_pack(t.cpu, BIND_FRAGMENT_PIPELINE, cfg) { - cfg.groups_of_8_immediate_textures = DIV_ROUND_UP(tex_count, 8); - cfg.groups_of_4_samplers = DIV_ROUND_UP(tex_count, 4); - cfg.more_than_4_textures = tex_count >= 4; - cfg.cf_binding_count = input_count; - cfg.pipeline = pipeline; - cfg.cf_bindings = varyings; - }; - - return t.gpu; -} - -static uint64_t -demo_interpolation(struct agx_varyings_vs *vs, struct agx_pool *pool) -{ - struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_INTERPOLATION_LENGTH, 64); - - agx_pack(t.cpu, INTERPOLATION, cfg) { - cfg.varying_count = agx_num_general_outputs(vs); - }; - - return t.gpu; -} - -static uint64_t -demo_linkage(struct agx_compiled_shader *vs, struct agx_compiled_shader *fs, struct agx_pool *pool) -{ - struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64); - - agx_pack(t.cpu, LINKAGE, cfg) { - cfg.varying_count = vs->info.varyings.vs.nr_index; - cfg.any_varyings = !!fs->info.varyings.fs.nr_bindings; - cfg.has_point_size = vs->info.writes_psiz; - cfg.has_frag_coord_z = fs->info.varyings.fs.reads_z; - }; - - return t.gpu; -} - -static uint64_t -demo_rasterizer(struct agx_context *ctx, struct agx_pool *pool, bool is_points) -{ - struct agx_rasterizer *rast = ctx->rast; - struct agx_rasterizer_packed out; - - agx_pack(&out, RASTERIZER, cfg) { - cfg.common.stencil_test_enable = ctx->zs->base.stencil[0].enabled; - cfg.common.two_sided_stencil = ctx->zs->base.stencil[1].enabled; - - cfg.front.stencil_reference = ctx->stencil_ref.ref_value[0]; - cfg.back.stencil_reference = cfg.common.two_sided_stencil ? - ctx->stencil_ref.ref_value[1] : - cfg.front.stencil_reference; - - cfg.front.line_width = cfg.back.line_width = rast->line_width; - cfg.front.polygon_mode = cfg.back.polygon_mode = AGX_POLYGON_MODE_FILL; - - cfg.common.unk_fill_lines = is_points; /* XXX: what is this? */ - - /* Always enable scissoring so we may scissor to the viewport (TODO: - * optimize this out if the viewport is the default and the app does not - * use the scissor test) */ - cfg.common.scissor_enable = true; - - cfg.common.depth_bias_enable = rast->base.offset_tri; - }; - - /* Words 2-3: front */ - out.opaque[2] |= ctx->zs->depth.opaque[0]; - out.opaque[3] |= ctx->zs->front_stencil.opaque[0]; - - /* Words 4-5: back */ - out.opaque[4] |= ctx->zs->depth.opaque[0]; - out.opaque[5] |= ctx->zs->back_stencil.opaque[0]; - - return agx_pool_upload_aligned(pool, &out, sizeof(out), 64); -} - static enum agx_object_type agx_point_object_type(struct agx_rasterizer *rast) { @@ -1533,73 +1456,13 @@ agx_point_object_type(struct agx_rasterizer *rast) AGX_OBJECT_TYPE_POINT_SPRITE_UV10; } -static uint64_t -demo_unk11(struct agx_pool *pool, struct agx_rasterizer *rast, - bool prim_lines, bool prim_points, bool reads_tib, - bool sample_mask_from_shader, bool no_colour_output) -{ - struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_UNKNOWN_4A_LENGTH, 64); - - agx_pack(T.cpu, UNKNOWN_4A, cfg) { - cfg.no_colour_output = no_colour_output; - cfg.lines_or_points = (prim_lines || prim_points); - cfg.reads_tilebuffer = reads_tib; - cfg.sample_mask_from_shader = sample_mask_from_shader; - - cfg.front.object_type = cfg.back.object_type = - prim_points ? agx_point_object_type(rast) : - prim_lines ? AGX_OBJECT_TYPE_LINE : - AGX_OBJECT_TYPE_TRIANGLE; - }; - - return T.gpu; -} - -static uint64_t -demo_unk12(struct agx_pool *pool) -{ - uint32_t unk[] = { - 0x410000, - 0x1e3ce508, - 0xa0 - }; - - return agx_pool_upload(pool, unk, sizeof(unk)); -} - -static uint64_t -agx_set_index(struct agx_pool *pool, uint16_t scissor, uint16_t zbias) -{ - struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_SET_INDEX_LENGTH, 64); - - agx_pack(T.cpu, SET_INDEX, cfg) { - cfg.scissor = scissor; - cfg.depth_bias = zbias; - }; - - return T.gpu; -} - -static void -agx_push_record(uint8_t **out, unsigned size_words, uint64_t ptr) -{ - assert(ptr < (1ull << 40)); - assert(size_words < (1ull << 24)); - - agx_pack(*out, RECORD, cfg) { - cfg.pointer_hi = (ptr >> 32); - cfg.pointer_lo = (uint32_t) ptr; - cfg.size_words = size_words; - }; - - *out += AGX_RECORD_LENGTH; -} - static uint8_t * agx_encode_state(struct agx_context *ctx, uint8_t *out, uint32_t pipeline_vertex, uint32_t pipeline_fragment, uint32_t varyings, bool is_lines, bool is_points) { + struct agx_rasterizer *rast = ctx->rast; + unsigned tex_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count; agx_pack(out, BIND_VERTEX_PIPELINE, cfg) { cfg.pipeline = pipeline_vertex; @@ -1614,18 +1477,11 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out, out += AGX_BIND_VERTEX_PIPELINE_LENGTH; struct agx_pool *pool = &ctx->batch->pool; + struct agx_compiled_shader *vs = ctx->vs, *fs = ctx->fs; bool reads_tib = ctx->fs->info.reads_tib; bool sample_mask_from_shader = ctx->fs->info.writes_sample_mask; bool no_colour_output = ctx->fs->info.no_colour_output; - agx_push_record(&out, 5, demo_interpolation(&ctx->vs->info.varyings.vs, pool)); - agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment, - varyings, ctx->fs->info.varyings.fs.nr_bindings)); - agx_push_record(&out, 4, demo_linkage(ctx->vs, ctx->fs, pool)); - agx_push_record(&out, 7, demo_rasterizer(ctx, pool, is_points)); - agx_push_record(&out, 5, demo_unk11(pool, ctx->rast, is_lines, is_points, reads_tib, - sample_mask_from_shader, no_colour_output)); - unsigned zbias = 0; if (ctx->rast->base.offset_tri) { @@ -1634,16 +1490,117 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out, } if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS)) { - struct agx_viewport_scissor vps = agx_upload_viewport_scissor(pool, - ctx->batch, &ctx->viewport, - ctx->rast->base.scissor ? &ctx->scissor : NULL); - - agx_push_record(&out, 10, vps.viewport); - agx_push_record(&out, 2, agx_set_index(pool, vps.scissor, zbias)); + agx_upload_viewport_scissor(pool, ctx->batch, &out, &ctx->viewport, + ctx->rast->base.scissor ? &ctx->scissor : NULL, + zbias); } - agx_push_record(&out, 3, demo_unk12(pool)); - agx_push_record(&out, 2, agx_pool_upload(pool, ctx->rast->cull, sizeof(ctx->rast->cull))); + enum agx_object_type object_type = + is_points ? agx_point_object_type(rast) : + is_lines ? AGX_OBJECT_TYPE_LINE : + AGX_OBJECT_TYPE_TRIANGLE; + + /* For now, we re-emit almost all state every draw. TODO: perf */ + struct agx_ppp_update ppp = agx_new_ppp_update(pool, (struct AGX_PPP_HEADER) { + .fragment_control = true, + .fragment_control_2 = true, + .fragment_front_face = true, + .fragment_front_face_2 = true, + .fragment_front_stencil = true, + .fragment_back_face = true, + .fragment_back_face_2 = true, + .fragment_back_stencil = true, + .w_clamp = true, + .output_select = true, + .varying_word_0 = true, + .varying_word_1 = true, + .cull = true, + .cull_2 = true, + .fragment_shader = true, + .occlusion_query = true, + .occlusion_query_2 = true, + .output_unknown = true, + .output_size = true, + .varying_word_2 = true, + }); + + agx_ppp_push(&ppp, FRAGMENT_CONTROL, cfg) { + cfg.stencil_test_enable = ctx->zs->base.stencil[0].enabled; + cfg.two_sided_stencil = ctx->zs->base.stencil[1].enabled; + cfg.depth_bias_enable = rast->base.offset_tri; + + cfg.unk_fill_lines = is_points; /* XXX: what is this? */ + + /* Always enable scissoring so we may scissor to the viewport (TODO: + * optimize this out if the viewport is the default and the app does not + * use the scissor test) */ + cfg.scissor_enable = true; + }; + + agx_ppp_push(&ppp, FRAGMENT_CONTROL_2, cfg) { + cfg.no_colour_output = no_colour_output; + cfg.lines_or_points = (is_lines || is_points); + cfg.reads_tilebuffer = reads_tib; + cfg.sample_mask_from_shader = sample_mask_from_shader; + }; + + struct agx_fragment_face_packed front_face, back_face; + agx_pack(&front_face, FRAGMENT_FACE, cfg) { + cfg.stencil_reference = ctx->stencil_ref.ref_value[0]; + cfg.line_width = rast->line_width; + cfg.polygon_mode = AGX_POLYGON_MODE_FILL; + }; + front_face.opaque[0] |= ctx->zs->depth.opaque[0]; + agx_ppp_push_packed(&ppp, &front_face, FRAGMENT_FACE); + + agx_ppp_push(&ppp, FRAGMENT_FACE_2, cfg) cfg.object_type = object_type; + agx_ppp_push_packed(&ppp, ctx->zs->front_stencil.opaque, FRAGMENT_STENCIL); + + agx_pack(&back_face, FRAGMENT_FACE, cfg) { + bool twosided = ctx->zs->base.stencil[1].enabled; + cfg.stencil_reference = ctx->stencil_ref.ref_value[twosided ? 1 : 0]; + + cfg.line_width = rast->line_width; + cfg.polygon_mode = AGX_POLYGON_MODE_FILL; + }; + back_face.opaque[0] |= ctx->zs->depth.opaque[0]; + agx_ppp_push_packed(&ppp, &back_face, FRAGMENT_FACE); + + agx_ppp_push(&ppp, FRAGMENT_FACE_2, cfg) cfg.object_type = object_type; + agx_ppp_push_packed(&ppp, ctx->zs->back_stencil.opaque, FRAGMENT_STENCIL); + agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10; + + agx_ppp_push(&ppp, OUTPUT_SELECT, cfg) { + cfg.varyings = !!fs->info.varyings.fs.nr_bindings; + cfg.point_size = vs->info.writes_psiz; + cfg.frag_coord_z = fs->info.varyings.fs.reads_z; + } + + agx_ppp_push(&ppp, VARYING_0, cfg) { + cfg.count = agx_num_general_outputs(&ctx->vs->info.varyings.vs); + } + + agx_ppp_push(&ppp, VARYING_1, cfg); + agx_ppp_push_packed(&ppp, ctx->rast->cull, CULL); + agx_ppp_push(&ppp, CULL_2, cfg); + + unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count; + agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) { + cfg.groups_of_8_immediate_textures = DIV_ROUND_UP(frag_tex_count, 8); + cfg.groups_of_4_samplers = DIV_ROUND_UP(frag_tex_count, 4); + cfg.more_than_4_textures = frag_tex_count >= 4; + cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings; + cfg.pipeline = pipeline_fragment; + cfg.cf_bindings = varyings; + } + + agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg); + agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg); + agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg); + agx_ppp_push(&ppp, OUTPUT_SIZE, cfg) cfg.count = vs->info.varyings.vs.nr_index; + agx_ppp_push(&ppp, VARYING_2, cfg); + + agx_ppp_fini(&out, &ppp); return out; }