asahi: Match PPP data structures with PowerVR

Looking at PowerVR's PPP definitions in tree in Mesa
(src/imagination/csbgen/), we find that AGX's "tagged" data structures
are actually sequences of state items prefixed by a header specifying
which state follows. Rather than hardcoding the sequences in which Apple's
driver chooses to bundle state, we need the XML to be flexible enough to
encode or decode any valid combination of state. That means reworking
the XML. While doing so, we find a number of fields that are identical
between RGX and AGX, and fix the names while at it (for example, the W
Clamp floating point).

Names are from the PowerVR code in Mesa where sensible.

Once we've reworked the XML, we need to rework the decoder.  Instead of
reading tags and printing the combined state packets, the decoder now
must unpack the header and print the individual state items specified by
the header, with slightly more complicated bounds checking.

Finally, state emission in the driver becomes much more flexible. To
prove the flexibility actually works, we now emit all PPP state (except for
viewport and scissor state) as a single PPP update. This works. After
this we can move onto more interesting arrangements of state for lower
driver overhead.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18421>
This commit is contained in:
Alyssa Rosenzweig 2022-09-04 15:17:22 -04:00 committed by Marge Bot
parent baadc1ec13
commit 942bda7f2d
5 changed files with 460 additions and 325 deletions

141
src/asahi/lib/agx_ppp.h Normal file
View file

@ -0,0 +1,141 @@
/*
* Copyright 2022 Alyssa Rosenzweig
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef AGX_PPP_H
#define AGX_PPP_H
#include "asahi/lib/agx_pack.h"
/* Opaque structure representing a PPP update */
struct agx_ppp_update {
uint8_t *head;
uint64_t gpu_base;
size_t total_size;
#ifndef NDEBUG
uint8_t *cpu_base;
#endif
};
static size_t
agx_ppp_update_size(struct AGX_PPP_HEADER *present)
{
size_t size = AGX_PPP_HEADER_LENGTH;
#define PPP_CASE(x, y) if (present->x) size += AGX_ ## y ##_LENGTH;
PPP_CASE(fragment_control, FRAGMENT_CONTROL);
PPP_CASE(fragment_control_2, FRAGMENT_CONTROL_2);
PPP_CASE(fragment_front_face, FRAGMENT_FACE);
PPP_CASE(fragment_front_face_2, FRAGMENT_FACE_2);
PPP_CASE(fragment_front_stencil, FRAGMENT_STENCIL);
PPP_CASE(fragment_back_face, FRAGMENT_FACE);
PPP_CASE(fragment_back_face_2, FRAGMENT_FACE_2);
PPP_CASE(fragment_back_stencil, FRAGMENT_STENCIL);
PPP_CASE(depth_bias_scissor, DEPTH_BIAS_SCISSOR);
PPP_CASE(region_clip, REGION_CLIP);
PPP_CASE(viewport, VIEWPORT);
PPP_CASE(w_clamp, W_CLAMP);
PPP_CASE(output_select, OUTPUT_SELECT);
PPP_CASE(varying_word_0, VARYING_0);
PPP_CASE(varying_word_1, VARYING_1);
PPP_CASE(cull, CULL);
PPP_CASE(cull_2, CULL_2);
PPP_CASE(fragment_shader, FRAGMENT_SHADER);
PPP_CASE(occlusion_query, FRAGMENT_OCCLUSION_QUERY);
PPP_CASE(occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2);
PPP_CASE(output_unknown, OUTPUT_UNKNOWN);
PPP_CASE(output_size, OUTPUT_SIZE);
PPP_CASE(varying_word_2, VARYING_2);
#undef PPP_CASE
assert((size % 4) == 0 && "PPP updates are aligned");
return size;
}
static inline bool
agx_ppp_validate(struct agx_ppp_update *ppp, size_t size)
{
#ifndef NDEBUG
/* Assert that we don't overflow. Ideally we'd assert that types match too
* but that's harder to do at the moment.
*/
assert(((ppp->head - ppp->cpu_base) + size) <= ppp->total_size);
#endif
return true;
}
#define agx_ppp_push(ppp, T, name) \
for (bool it = agx_ppp_validate((ppp), AGX_##T##_LENGTH); it; it = false, \
(ppp)->head += AGX_##T##_LENGTH) \
agx_pack((ppp)->head, T, name)
#define agx_ppp_push_packed(ppp, src, T) do { \
agx_ppp_validate((ppp), AGX_##T##_LENGTH); \
memcpy((ppp)->head, src, AGX_##T##_LENGTH); \
(ppp)->head += AGX_##T##_LENGTH; \
} while(0) \
static inline struct agx_ppp_update
agx_new_ppp_update(struct agx_pool *pool, struct AGX_PPP_HEADER present)
{
size_t size = agx_ppp_update_size(&present);
struct agx_ptr T = agx_pool_alloc_aligned(pool, size, 64);
struct agx_ppp_update ppp = {
.gpu_base = T.gpu,
.head = T.cpu,
.total_size = size,
#ifndef NDEBUG
.cpu_base = T.cpu,
#endif
};
agx_ppp_push(&ppp, PPP_HEADER, cfg) { cfg = present; }
return ppp;
}
static inline void
agx_ppp_fini(uint8_t **out, struct agx_ppp_update *ppp)
{
size_t size = ppp->total_size;
assert((size % 4) == 0);
size_t size_words = size / 4;
#ifndef NDEBUG
assert(size == (ppp->head - ppp->cpu_base) && "mismatched ppp size");
#endif
assert(ppp->gpu_base < (1ull << 40));
assert(size_words < (1ull << 24));
agx_pack(*out, RECORD, cfg) {
cfg.pointer_hi = (ppp->gpu_base >> 32);
cfg.pointer_lo = (uint32_t) ppp->gpu_base;
cfg.size_words = size_words;
};
*out += AGX_RECORD_LENGTH;
}
#endif

View file

@ -288,33 +288,6 @@
<field name="Border colour" size="2" start="55" type="Border colour"/>
</struct>
<!--- Identified by tag? -->
<struct name="Viewport" size="40">
<field name="Tag" size="32" start="0:0" type="hex" default="0xc00"/>
<!-- Acts like a scissor at 32x32 tile boundaries, ignored unless clip tile is set -->
<field name="Max tile X" size="9" start="1:0" type="uint" modifier="minus(1)"/>
<field name="Min tile X" size="9" start="1:16" type="uint"/>
<field name="Clip tile" size="1" start="1:31" type="bool"/>
<field name="Max tile Y" size="9" start="2:0" type="uint" modifier="minus(1)"/>
<field name="Min tile Y" size="9" start="2:16" type="uint"/>
<!-- Used to convert clip space coordinates to NDC, does not clip -->
<field name="Translate X" size="32" start="4:0" type="float"/>
<field name="Scale X" size="32" start="5:0" type="float"/>
<field name="Translate Y" size="32" start="6:0" type="float"/>
<field name="Scale Y" size="32" start="7:0" type="float"/>
<!-- Specifies an affine transformation from clip coordinates to viewport
depth coordinates. For APIs with clip coordinates [0, 1], this cooresponds
to near z and (far z - near z) respectively. In general, given clip
coordinate z_in, the viewport depth is given as (z_in * scale_z) +
translate_z. For example, the default [0, 1] depth buffer in OpenGL with
[-1, +1] clip coordinates is specified as scale = 1/2, bias = 1/2 -->
<field name="Translate Z" size="32" start="8:0" type="float"/>
<field name="Scale Z" size="32" start="9:0" type="float"/>
</struct>
<!--- Pointed to from the command buffer -->
<struct name="Scissor" size="16">
<field name="Max X" size="16" start="0:0" type="uint"/>
@ -332,7 +305,53 @@
<field name="Clamp" size="32" start="2:0" type="float"/>
</struct>
<struct name="Fragment face">
<!-- PPP state starts -->
<struct name="PPP Header" size="4">
<field name="Fragment control" size="1" start="0" type="bool"/>
<field name="Fragment control 2" size="1" start="1" type="bool"/>
<field name="Fragment front face" size="1" start="2" type="bool"/>
<field name="Fragment front face 2" size="1" start="3" type="bool"/>
<field name="Fragment front stencil" size="1" start="4" type="bool"/>
<field name="Fragment back face" size="1" start="5" type="bool"/>
<field name="Fragment back face 2" size="1" start="6" type="bool"/>
<field name="Fragment back stencil" size="1" start="7" type="bool"/>
<field name="Depth bias/scissor" size="1" start="8" type="bool"/>
<field name="Region clip" size="1" start="10" type="bool"/>
<field name="Viewport" size="1" start="11" type="bool"/>
<field name="W clamp" size="1" start="16" type="bool"/>
<field name="Output select" size="1" start="17" type="bool"/>
<field name="Varying word 0" size="1" start="18" type="bool"/>
<field name="Varying word 1" size="1" start="19" type="bool"/>
<field name="Cull" size="1" start="21" type="bool"/>
<field name="Cull 2" size="1" start="22" type="bool"/>
<field name="Fragment shader" size="1" start="23" type="bool"/>
<field name="Occlusion query" size="1" start="24" type="bool"/>
<field name="Occlusion query 2" size="1" start="25" type="bool"/>
<field name="Output unknown" size="1" start="26" type="bool"/>
<field name="Output size" size="1" start="27" type="bool"/>
<field name="Varying word 2" size="1" start="28" type="bool"/>
</struct>
<!-- Acts like a scissor at 32x32 tile boundaries, ignored unless clip tile is set -->
<struct name="Region clip" size="12">
<field name="Max X" size="9" start="0" type="uint" modifier="minus(1)"/>
<field name="Min X" size="9" start="16" type="uint"/>
<field name="Enable" size="1" start="31" type="bool"/>
<field name="Max Y" size="9" start="32" type="uint" modifier="minus(1)"/>
<field name="Min Y" size="9" start="48" type="uint"/>
</struct>
<!-- Used to convert clip space coordinates to NDC, does not clip -->
<struct name="Viewport" size="24">
<field name="Translate X" size="32" start="0:0" type="float"/>
<field name="Scale X" size="32" start="1:0" type="float"/>
<field name="Translate Y" size="32" start="2:0" type="float"/>
<field name="Scale Y" size="32" start="3:0" type="float"/>
<field name="Translate Z" size="32" start="4:0" type="float"/>
<field name="Scale Z" size="32" start="5:0" type="float"/>
</struct>
<struct name="Fragment face" size="4">
<field name="Stencil reference" size="8" start="0" type="hex"/>
<!-- line width is 4:4 fixed point with off-by-one applied -->
<field name="Line width" size="8" start="8" type="hex"/>
@ -342,7 +361,12 @@
<field name="Depth function" size="3" start="24" type="ZS Func"/>
</struct>
<struct name="Fragment stencil" size="8">
<struct name="Fragment face 2" size="4">
<field name="Unknown" size="8" start="20" type="hex" default="0x7e"/>
<field name="Object type" size="4" start="28" type="Object Type"/>
</struct>
<struct name="Fragment stencil" size="4">
<field name="Write mask" size="8" start="0" type="hex"/>
<field name="Read mask" size="8" start="8" type="hex"/>
<field name="Depth pass" size="3" start="16" type="Stencil Op"/>
@ -351,7 +375,7 @@
<field name="Compare" size="3" start="25" type="ZS Func"/>
</struct>
<struct name="Fragment control">
<struct name="Fragment control" size="4">
<field name="Unk 1" size="1" start="9" type="hex" default="0x1"/>
<field name="Visibility mode" size="2" start="14" type="Visibility Mode"/>
<field name="Scissor enable" size="1" start="16" type="bool"/>
@ -361,71 +385,90 @@
<field name="Unk fill lines" size="1" start="26" type="hex" default="0x0"/> <!-- set when drawing LINES -->
</struct>
<struct name="Fragment occlusion query">
<struct name="Fragment control 2" size="4">
<field name="Unk 1" size="1" start="9" type="bool" default="true"/>
<!-- Or discards? -->
<field name="No colour output" size="1" start="21" type="bool"/>
<field name="Lines or points" size="1" start="26" type="bool"/>
<field name="Reads tilebuffer" size="1" start="29" type="bool"/>
<field name="Sample mask from shader" size="1" start="30" type="bool"/>
</struct>
<struct name="Fragment occlusion query" size="4">
<field name="Offset" size="18" start="14" type="uint"/>
</struct>
<struct name="Rasterizer" size="28">
<field name="Tag" size="32" start="0:0" type="hex" default="0x10000b5"/>
<field name="Common" size="32" start="1:0" type="Fragment control"/>
<field name="Front" size="32" start="2:0" type="Fragment face"/>
<field name="Front stencil" size="32" start="3:0" type="Fragment stencil"/>
<field name="Back" size="32" start="4:0" type="Fragment face"/>
<field name="Back stencil" size="32" start="5:0" type="Fragment stencil"/>
<field name="Occlusion query" size="32" start="6:0" type="Fragment occlusion query"/>
<struct name="Fragment occlusion query 2" size="4">
<field name="Unknown" size="17" start="0" type="hex"/>
</struct>
<struct name="Unknown face" size="4">
<field name="Unknown" size="8" start="0:20" type="hex" default="0x7e"/>
<field name="Object type" size="4" start="0:28" type="Object Type"/>
<struct name="W Clamp" size="4">
<field name="W Clamp" size="32" start="0" type="float"/>
</struct>
<struct name="Unknown 4a" size="20">
<field name="Tag" size="32" start="0:0" type="hex" default="0x200004a"/>
<field name="Unk 1" size="1" start="1:9" type="bool" default="true"/>
<!-- Or discards? -->
<field name="No colour output" size="1" start="1:21" type="bool"/>
<field name="Lines or points" size="1" start="1:26" type="bool"/>
<field name="Reads tilebuffer" size="1" start="1:29" type="bool"/>
<field name="Sample mask from shader" size="1" start="1:30" type="bool"/>
<field name="Front" size="32" start="2:0" type="Unknown face"/>
<field name="Back" size="32" start="3:0" type="Unknown face"/>
<!-- Guess. Zeroed when sample mask written from the shader in all CF paths -->
<field name="Sample mask" size="17" start="4:0" type="hex" default="0x1ffff"/>
<struct name="Cull" size="4">
<field name="Cull front" size="1" start="0" type="bool"/>
<field name="Cull back" size="1" start="1" type="bool"/>
<field name="Unk GL 1" size="1" start="7" type="bool"/>
<field name="Unk GL 2" size="1" start="8" type="bool"/>
<field name="Depth clip" size="1" start="10" type="bool"/>
<field name="Depth clamp" size="1" start="11" type="bool"/>
<field name="Front face CCW" size="1" start="16" type="bool"/>
</struct>
<struct name="Cull" size="8">
<field name="Tag" size="32" start="0:0" type="hex" default="0x200000"/>
<field name="Cull front" size="1" start="1:0" type="bool"/>
<field name="Cull back" size="1" start="1:1" type="bool"/>
<field name="Unk GL 1" size="1" start="1:7" type="bool"/>
<field name="Unk GL 2" size="1" start="1:8" type="bool"/>
<field name="Depth clip" size="1" start="1:10" type="bool"/>
<field name="Depth clamp" size="1" start="1:11" type="bool"/>
<field name="Front face CCW" size="1" start="1:16" type="bool"/>
<struct name="Cull 2" size="4">
<field name="Unknown 2" size="32" start="0" type="hex" default="0xa0"/>
</struct>
<struct name="Interpolation" size="20">
<field name="Tag" size="32" start="0:0" type="hex" default="0x100C0000"/>
<field name="Varying count" size="32" start="1:0" type="uint"/>
<struct name="Varying 0" size="4">
<field name="Count" size="32" start="0" type="uint"/>
</struct>
<struct name="Linkage" size="16">
<field name="Tag" size="32" start="0:0" type="hex" default="0xC020000"/>
<field name="Any varyings" size="1" start="1:16" type="bool"/>
<field name="Has point size" size="1" start="1:18" type="bool"/>
<field name="Has frag coord Z" size="1" start="1:21" type="bool"/>
<field name="Unk 2" size="32" start="2:0" type="hex" default="0x0"/>
<field name="Varying count" size="32" start="3:0" type="uint"/>
<struct name="Varying 1" size="4">
<!-- TODO -->
</struct>
<struct name="Varying 2" size="8">
<!-- TODO -->
</struct>
<struct name="Output Select" size="4">
<field name="Varyings" size="1" start="16" type="bool"/>
<field name="Point size" size="1" start="18" type="bool"/>
<field name="Frag coord Z" size="1" start="21" type="bool"/>
</struct>
<struct name="Output Unknown" size="4">
<!-- So far always zero -->
</struct>
<struct name="Output Size" size="4">
<field name="Count" size="32" start="0" type="uint"/>
</struct>
<!-- Indexes into the scissor and depth bias arrays -->
<struct name="Set index" size="8">
<field name="Tag" size="32" start="0:0" type="hex" default="0x100"/>
<field name="Scissor" size="16" start="1:0" type="uint"/>
<field name="Depth bias" size="16" start="1:16" type="uint"/>
<struct name="Depth bias/Scissor" size="4">
<field name="Scissor" size="16" start="0" type="uint"/>
<field name="Depth bias" size="16" start="16" type="uint"/>
</struct>
<struct name="Fragment shader" size="16">
<!-- 4 if more than 32 textures bound -->
<field name="Unk 1" size="4" start="0:0" type="hex" default="0x2"/>
<field name="Groups of 8 immediate textures" start="0:4" size="3" type="uint"/>
<field name="Groups of 4 samplers" start="0:9" size="3" type="uint"/>
<!-- When more than 48 textures bound, switches to 0x8, unk1 switches to
0x6, and some funny sort of bindless access(?) is used in the shader -->
<field name="Unk 2" size="4" start="0:12" type="hex" default="0x1"/>
<field name="CF binding count" size="8" start="0:16" type="uint" default="0"/>
<field name="Padding 1" size="8" start="0:24" type="hex" default="0x0"/>
<field name="Pipeline" size="32" start="1:0" type="address"/>
<field name="CF bindings" size="32" start="2:0" type="address"/>
<field name="More than 4 textures" start="3:0" size="1" type="bool"/>
</struct>
<!-- PPP state ends -->
<!--- Commands valid within a pipeline -->
<struct name="Bind uniform" size="8">
<field name="Tag" size="8" start="0:0" type="hex" default="0x1d"/>
@ -531,22 +574,6 @@
<field name="Unk 3" size="32" start="5:0" type="address"/> <!-- C020000 -->
</struct>
<struct name="Bind fragment pipeline" size="20">
<field name="Tag" size="32" start="0:0" type="hex" default="0x800000"/>
<!-- 4 if more than 32 textures bound -->
<field name="Unk 1" size="4" start="1:0" type="hex" default="0x2"/>
<field name="Groups of 8 immediate textures" start="1:4" size="3" type="uint"/>
<field name="Groups of 4 samplers" start="1:9" size="3" type="uint"/>
<!-- When more than 48 textures bound, switches to 0x8, unk1 switches to
0x6, and some funny sort of bindless access(?) is used in the shader -->
<field name="Unk 2" size="4" start="1:12" type="hex" default="0x1"/>
<field name="CF binding count" size="8" start="1:16" type="uint" default="0"/>
<field name="Padding 1" size="8" start="1:24" type="hex" default="0x0"/>
<field name="Pipeline" size="32" start="2:0" type="address"/>
<field name="CF bindings" size="32" start="3:0" type="address"/>
<field name="More than 4 textures" start="4:0" size="1" type="bool"/>
</struct>
<!-- Subcommands are packed inside sized records -->
<struct name="Record" size="8">
<field name="Pointer (hi)" size="8" start="0:0" type="hex"/>

View file

@ -253,7 +253,6 @@ agxdecode_map_read_write(void)
#define agxdecode_msg(str) fprintf(agxdecode_dump_stream, "// %s", str)
unsigned agxdecode_indent = 0;
uint64_t pipeline_base = 0;
static void
agxdecode_dump_bo(struct agx_bo *bo, const char *name)
@ -395,60 +394,70 @@ agxdecode_pipeline(const uint8_t *map, UNUSED bool verbose)
}
}
#define PPP_PRINT(map, header_name, struct_name, human) \
if (hdr.header_name) { \
assert(((map + AGX_##struct_name##_LENGTH) <= (base + size)) && \
"buffer overrun in PPP update"); \
DUMP_CL(struct_name, map, human); \
map += AGX_##struct_name##_LENGTH; \
}
static void
agxdecode_record(uint64_t va, size_t size, bool verbose)
{
uint8_t *map = agxdecode_fetch_gpu_mem(va, size);
uint32_t tag = 0;
memcpy(&tag, map, 4);
uint8_t *base = agxdecode_fetch_gpu_mem(va, size);
uint8_t *map = base;
if (tag == 0x00000C00) {
assert(size == AGX_VIEWPORT_LENGTH);
DUMP_CL(VIEWPORT, map, "Viewport");
} else if (tag == 0x100C0000) {
assert(size == AGX_INTERPOLATION_LENGTH);
DUMP_CL(INTERPOLATION, map, "Interpolation");
} else if (tag == 0x0C020000) {
assert(size == AGX_LINKAGE_LENGTH);
DUMP_CL(LINKAGE, map, "Linkage");
} else if (tag == 0x200004a) {
assert(size == AGX_UNKNOWN_4A_LENGTH);
DUMP_CL(UNKNOWN_4A, map, "Unknown 4a");
} else if (tag == 0x10000b5) {
assert(size == AGX_RASTERIZER_LENGTH);
DUMP_CL(RASTERIZER, map, "Rasterizer");
} else if (tag == 0x200000) {
assert(size == AGX_CULL_LENGTH);
DUMP_CL(CULL, map, "Cull");
} else if (tag == 0x000100) {
assert(size == AGX_SET_INDEX_LENGTH);
DUMP_CL(SET_INDEX, map, "Set index");
} else if (tag == 0x800000) {
assert(size == AGX_BIND_FRAGMENT_PIPELINE_LENGTH);
agx_unpack(agxdecode_dump_stream, map, PPP_HEADER, hdr);
map += AGX_PPP_HEADER_LENGTH;
agx_unpack(agxdecode_dump_stream, map, BIND_FRAGMENT_PIPELINE, cmd);
agxdecode_stateful(cmd.pipeline, "Pipeline", agxdecode_pipeline, verbose);
PPP_PRINT(map, fragment_control, FRAGMENT_CONTROL, "Fragment control");
PPP_PRINT(map, fragment_control_2, FRAGMENT_CONTROL_2, "Fragment control 2");
PPP_PRINT(map, fragment_front_face, FRAGMENT_FACE, "Front face");
PPP_PRINT(map, fragment_front_face_2, FRAGMENT_FACE_2, "Front face 2");
PPP_PRINT(map, fragment_front_stencil, FRAGMENT_STENCIL, "Front stencil");
PPP_PRINT(map, fragment_back_face, FRAGMENT_FACE, "Back face");
PPP_PRINT(map, fragment_back_face_2, FRAGMENT_FACE_2, "Back face 2");
PPP_PRINT(map, fragment_back_stencil, FRAGMENT_STENCIL, "Back stencil");
PPP_PRINT(map, depth_bias_scissor, DEPTH_BIAS_SCISSOR, "Depth bias/scissor");
PPP_PRINT(map, region_clip, REGION_CLIP, "Region clip");
PPP_PRINT(map, viewport, VIEWPORT, "Viewport");
PPP_PRINT(map, w_clamp, W_CLAMP, "W clamp");
PPP_PRINT(map, output_select, OUTPUT_SELECT, "Output select");
PPP_PRINT(map, varying_word_0, VARYING_0, "Varying word 0");
PPP_PRINT(map, varying_word_1, VARYING_1, "Varying word 1");
PPP_PRINT(map, cull, CULL, "Cull");
PPP_PRINT(map, cull_2, CULL_2, "Cull 2");
if (cmd.cf_bindings) {
uint8_t *map = agxdecode_fetch_gpu_mem(cmd.cf_bindings, 128);
hexdump(agxdecode_dump_stream, map, 128, false);
if (hdr.fragment_shader) {
agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER, frag);
agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_pipeline, verbose);
DUMP_CL(CF_BINDING_HEADER, map, "Coefficient binding header:");
map += AGX_CF_BINDING_HEADER_LENGTH;
if (frag.cf_bindings) {
uint8_t *cf = agxdecode_fetch_gpu_mem(frag.cf_bindings, 128);
hexdump(agxdecode_dump_stream, cf, 128, false);
for (unsigned i = 0; i < cmd.cf_binding_count; ++i) {
DUMP_CL(CF_BINDING, map, "Coefficient binding:");
map += AGX_CF_BINDING_LENGTH;
DUMP_CL(CF_BINDING_HEADER, cf, "Coefficient binding header:");
cf += AGX_CF_BINDING_HEADER_LENGTH;
for (unsigned i = 0; i < frag.cf_binding_count; ++i) {
DUMP_CL(CF_BINDING, cf, "Coefficient binding:");
cf += AGX_CF_BINDING_LENGTH;
}
}
DUMP_UNPACKED(BIND_FRAGMENT_PIPELINE, cmd, "Bind fragment pipeline\n");
} else if (size == 0) {
pipeline_base = va;
} else {
fprintf(agxdecode_dump_stream, "Record %" PRIx64 "\n", va);
hexdump(agxdecode_dump_stream, map, size, false);
DUMP_UNPACKED(FRAGMENT_SHADER, frag, "Fragment shader\n");
map += AGX_FRAGMENT_SHADER_LENGTH;
}
PPP_PRINT(map, occlusion_query, FRAGMENT_OCCLUSION_QUERY, "Occlusion query");
PPP_PRINT(map, occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2, "Occlusion query 2");
PPP_PRINT(map, output_unknown, OUTPUT_UNKNOWN, "Output unknown");
PPP_PRINT(map, output_size, OUTPUT_SIZE, "Output size");
PPP_PRINT(map, varying_word_2, VARYING_2, "Varying word 2");
/* PPP print checks we don't read too much, now check we read enough */
assert(map == (base + size) && "invalid size of PPP update");
}
static unsigned

View file

@ -24,6 +24,7 @@ dep_iokit = dependency('IOKit', required : false)
libasahi_lib_files = files(
'agx_device.c',
'agx_formats.c',
'agx_ppp.h',
'pool.c',
)

View file

@ -43,6 +43,7 @@
#include "agx_state.h"
#include "asahi/lib/agx_pack.h"
#include "asahi/lib/agx_formats.h"
#include "asahi/lib/agx_ppp.h"
static struct pipe_stream_output_target *
agx_create_stream_output_target(struct pipe_context *pctx,
@ -628,19 +629,14 @@ agx_set_viewport_states(struct pipe_context *pctx,
ctx->viewport = *vp;
}
struct agx_viewport_scissor {
uint64_t viewport;
unsigned scissor;
};
static struct agx_viewport_scissor
static void
agx_upload_viewport_scissor(struct agx_pool *pool,
struct agx_batch *batch,
uint8_t **out,
const struct pipe_viewport_state *vp,
const struct pipe_scissor_state *ss)
const struct pipe_scissor_state *ss,
unsigned zbias)
{
struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_VIEWPORT_LENGTH, 64);
float trans_x = vp->translate[0], trans_y = vp->translate[1];
float abs_scale_x = fabsf(vp->scale[0]), abs_scale_y = fabsf(vp->scale[1]);
@ -665,21 +661,6 @@ agx_upload_viewport_scissor(struct agx_pool *pool,
float minz, maxz;
util_viewport_zmin_zmax(vp, false, &minz, &maxz);
agx_pack(T.cpu, VIEWPORT, cfg) {
cfg.min_tile_x = minx / 32;
cfg.min_tile_y = miny / 32;
cfg.max_tile_x = DIV_ROUND_UP(maxx, 32);
cfg.max_tile_y = DIV_ROUND_UP(maxy, 32);
cfg.clip_tile = true;
cfg.translate_x = vp->translate[0];
cfg.translate_y = vp->translate[1];
cfg.translate_z = vp->translate[2];
cfg.scale_x = vp->scale[0];
cfg.scale_y = vp->scale[1];
cfg.scale_z = vp->scale[2];
}
/* Allocate a new scissor descriptor */
struct agx_scissor_packed *ptr = batch->scissor.bo->ptr.cpu;
unsigned index = (batch->scissor.count++);
@ -693,10 +674,36 @@ agx_upload_viewport_scissor(struct agx_pool *pool,
cfg.max_z = maxz;
}
return (struct agx_viewport_scissor) {
.viewport = T.gpu,
.scissor = index
/* Upload state */
struct agx_ppp_update ppp = agx_new_ppp_update(pool, (struct AGX_PPP_HEADER) {
.depth_bias_scissor = true,
.region_clip = true,
.viewport = true,
});
agx_ppp_push(&ppp, DEPTH_BIAS_SCISSOR, cfg) {
cfg.scissor = index;
cfg.depth_bias = zbias;
};
agx_ppp_push(&ppp, REGION_CLIP, cfg) {
cfg.enable = true;
cfg.min_x = minx / 32;
cfg.min_y = miny / 32;
cfg.max_x = DIV_ROUND_UP(maxx, 32);
cfg.max_y = DIV_ROUND_UP(maxy, 32);
}
agx_ppp_push(&ppp, VIEWPORT, cfg) {
cfg.translate_x = vp->translate[0];
cfg.translate_y = vp->translate[1];
cfg.translate_z = vp->translate[2];
cfg.scale_x = vp->scale[0];
cfg.scale_y = vp->scale[1];
cfg.scale_z = vp->scale[2];
}
agx_ppp_fini(out, &ppp);
}
static uint16_t
@ -1441,90 +1448,6 @@ agx_build_store_pipeline(struct agx_context *ctx, uint32_t code,
return ptr.gpu;
}
static uint64_t
demo_launch_fragment(struct agx_context *ctx, struct agx_pool *pool, uint32_t pipeline, uint32_t varyings, unsigned input_count)
{
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_BIND_FRAGMENT_PIPELINE_LENGTH, 64);
unsigned tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
agx_pack(t.cpu, BIND_FRAGMENT_PIPELINE, cfg) {
cfg.groups_of_8_immediate_textures = DIV_ROUND_UP(tex_count, 8);
cfg.groups_of_4_samplers = DIV_ROUND_UP(tex_count, 4);
cfg.more_than_4_textures = tex_count >= 4;
cfg.cf_binding_count = input_count;
cfg.pipeline = pipeline;
cfg.cf_bindings = varyings;
};
return t.gpu;
}
static uint64_t
demo_interpolation(struct agx_varyings_vs *vs, struct agx_pool *pool)
{
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_INTERPOLATION_LENGTH, 64);
agx_pack(t.cpu, INTERPOLATION, cfg) {
cfg.varying_count = agx_num_general_outputs(vs);
};
return t.gpu;
}
static uint64_t
demo_linkage(struct agx_compiled_shader *vs, struct agx_compiled_shader *fs, struct agx_pool *pool)
{
struct agx_ptr t = agx_pool_alloc_aligned(pool, AGX_LINKAGE_LENGTH, 64);
agx_pack(t.cpu, LINKAGE, cfg) {
cfg.varying_count = vs->info.varyings.vs.nr_index;
cfg.any_varyings = !!fs->info.varyings.fs.nr_bindings;
cfg.has_point_size = vs->info.writes_psiz;
cfg.has_frag_coord_z = fs->info.varyings.fs.reads_z;
};
return t.gpu;
}
static uint64_t
demo_rasterizer(struct agx_context *ctx, struct agx_pool *pool, bool is_points)
{
struct agx_rasterizer *rast = ctx->rast;
struct agx_rasterizer_packed out;
agx_pack(&out, RASTERIZER, cfg) {
cfg.common.stencil_test_enable = ctx->zs->base.stencil[0].enabled;
cfg.common.two_sided_stencil = ctx->zs->base.stencil[1].enabled;
cfg.front.stencil_reference = ctx->stencil_ref.ref_value[0];
cfg.back.stencil_reference = cfg.common.two_sided_stencil ?
ctx->stencil_ref.ref_value[1] :
cfg.front.stencil_reference;
cfg.front.line_width = cfg.back.line_width = rast->line_width;
cfg.front.polygon_mode = cfg.back.polygon_mode = AGX_POLYGON_MODE_FILL;
cfg.common.unk_fill_lines = is_points; /* XXX: what is this? */
/* Always enable scissoring so we may scissor to the viewport (TODO:
* optimize this out if the viewport is the default and the app does not
* use the scissor test) */
cfg.common.scissor_enable = true;
cfg.common.depth_bias_enable = rast->base.offset_tri;
};
/* Words 2-3: front */
out.opaque[2] |= ctx->zs->depth.opaque[0];
out.opaque[3] |= ctx->zs->front_stencil.opaque[0];
/* Words 4-5: back */
out.opaque[4] |= ctx->zs->depth.opaque[0];
out.opaque[5] |= ctx->zs->back_stencil.opaque[0];
return agx_pool_upload_aligned(pool, &out, sizeof(out), 64);
}
static enum agx_object_type
agx_point_object_type(struct agx_rasterizer *rast)
{
@ -1533,73 +1456,13 @@ agx_point_object_type(struct agx_rasterizer *rast)
AGX_OBJECT_TYPE_POINT_SPRITE_UV10;
}
static uint64_t
demo_unk11(struct agx_pool *pool, struct agx_rasterizer *rast,
bool prim_lines, bool prim_points, bool reads_tib,
bool sample_mask_from_shader, bool no_colour_output)
{
struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_UNKNOWN_4A_LENGTH, 64);
agx_pack(T.cpu, UNKNOWN_4A, cfg) {
cfg.no_colour_output = no_colour_output;
cfg.lines_or_points = (prim_lines || prim_points);
cfg.reads_tilebuffer = reads_tib;
cfg.sample_mask_from_shader = sample_mask_from_shader;
cfg.front.object_type = cfg.back.object_type =
prim_points ? agx_point_object_type(rast) :
prim_lines ? AGX_OBJECT_TYPE_LINE :
AGX_OBJECT_TYPE_TRIANGLE;
};
return T.gpu;
}
static uint64_t
demo_unk12(struct agx_pool *pool)
{
uint32_t unk[] = {
0x410000,
0x1e3ce508,
0xa0
};
return agx_pool_upload(pool, unk, sizeof(unk));
}
static uint64_t
agx_set_index(struct agx_pool *pool, uint16_t scissor, uint16_t zbias)
{
struct agx_ptr T = agx_pool_alloc_aligned(pool, AGX_SET_INDEX_LENGTH, 64);
agx_pack(T.cpu, SET_INDEX, cfg) {
cfg.scissor = scissor;
cfg.depth_bias = zbias;
};
return T.gpu;
}
static void
agx_push_record(uint8_t **out, unsigned size_words, uint64_t ptr)
{
assert(ptr < (1ull << 40));
assert(size_words < (1ull << 24));
agx_pack(*out, RECORD, cfg) {
cfg.pointer_hi = (ptr >> 32);
cfg.pointer_lo = (uint32_t) ptr;
cfg.size_words = size_words;
};
*out += AGX_RECORD_LENGTH;
}
static uint8_t *
agx_encode_state(struct agx_context *ctx, uint8_t *out,
uint32_t pipeline_vertex, uint32_t pipeline_fragment, uint32_t varyings,
bool is_lines, bool is_points)
{
struct agx_rasterizer *rast = ctx->rast;
unsigned tex_count = ctx->stage[PIPE_SHADER_VERTEX].texture_count;
agx_pack(out, BIND_VERTEX_PIPELINE, cfg) {
cfg.pipeline = pipeline_vertex;
@ -1614,18 +1477,11 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
out += AGX_BIND_VERTEX_PIPELINE_LENGTH;
struct agx_pool *pool = &ctx->batch->pool;
struct agx_compiled_shader *vs = ctx->vs, *fs = ctx->fs;
bool reads_tib = ctx->fs->info.reads_tib;
bool sample_mask_from_shader = ctx->fs->info.writes_sample_mask;
bool no_colour_output = ctx->fs->info.no_colour_output;
agx_push_record(&out, 5, demo_interpolation(&ctx->vs->info.varyings.vs, pool));
agx_push_record(&out, 5, demo_launch_fragment(ctx, pool, pipeline_fragment,
varyings, ctx->fs->info.varyings.fs.nr_bindings));
agx_push_record(&out, 4, demo_linkage(ctx->vs, ctx->fs, pool));
agx_push_record(&out, 7, demo_rasterizer(ctx, pool, is_points));
agx_push_record(&out, 5, demo_unk11(pool, ctx->rast, is_lines, is_points, reads_tib,
sample_mask_from_shader, no_colour_output));
unsigned zbias = 0;
if (ctx->rast->base.offset_tri) {
@ -1634,16 +1490,117 @@ agx_encode_state(struct agx_context *ctx, uint8_t *out,
}
if (ctx->dirty & (AGX_DIRTY_VIEWPORT | AGX_DIRTY_SCISSOR_ZBIAS)) {
struct agx_viewport_scissor vps = agx_upload_viewport_scissor(pool,
ctx->batch, &ctx->viewport,
ctx->rast->base.scissor ? &ctx->scissor : NULL);
agx_push_record(&out, 10, vps.viewport);
agx_push_record(&out, 2, agx_set_index(pool, vps.scissor, zbias));
agx_upload_viewport_scissor(pool, ctx->batch, &out, &ctx->viewport,
ctx->rast->base.scissor ? &ctx->scissor : NULL,
zbias);
}
agx_push_record(&out, 3, demo_unk12(pool));
agx_push_record(&out, 2, agx_pool_upload(pool, ctx->rast->cull, sizeof(ctx->rast->cull)));
enum agx_object_type object_type =
is_points ? agx_point_object_type(rast) :
is_lines ? AGX_OBJECT_TYPE_LINE :
AGX_OBJECT_TYPE_TRIANGLE;
/* For now, we re-emit almost all state every draw. TODO: perf */
struct agx_ppp_update ppp = agx_new_ppp_update(pool, (struct AGX_PPP_HEADER) {
.fragment_control = true,
.fragment_control_2 = true,
.fragment_front_face = true,
.fragment_front_face_2 = true,
.fragment_front_stencil = true,
.fragment_back_face = true,
.fragment_back_face_2 = true,
.fragment_back_stencil = true,
.w_clamp = true,
.output_select = true,
.varying_word_0 = true,
.varying_word_1 = true,
.cull = true,
.cull_2 = true,
.fragment_shader = true,
.occlusion_query = true,
.occlusion_query_2 = true,
.output_unknown = true,
.output_size = true,
.varying_word_2 = true,
});
agx_ppp_push(&ppp, FRAGMENT_CONTROL, cfg) {
cfg.stencil_test_enable = ctx->zs->base.stencil[0].enabled;
cfg.two_sided_stencil = ctx->zs->base.stencil[1].enabled;
cfg.depth_bias_enable = rast->base.offset_tri;
cfg.unk_fill_lines = is_points; /* XXX: what is this? */
/* Always enable scissoring so we may scissor to the viewport (TODO:
* optimize this out if the viewport is the default and the app does not
* use the scissor test) */
cfg.scissor_enable = true;
};
agx_ppp_push(&ppp, FRAGMENT_CONTROL_2, cfg) {
cfg.no_colour_output = no_colour_output;
cfg.lines_or_points = (is_lines || is_points);
cfg.reads_tilebuffer = reads_tib;
cfg.sample_mask_from_shader = sample_mask_from_shader;
};
struct agx_fragment_face_packed front_face, back_face;
agx_pack(&front_face, FRAGMENT_FACE, cfg) {
cfg.stencil_reference = ctx->stencil_ref.ref_value[0];
cfg.line_width = rast->line_width;
cfg.polygon_mode = AGX_POLYGON_MODE_FILL;
};
front_face.opaque[0] |= ctx->zs->depth.opaque[0];
agx_ppp_push_packed(&ppp, &front_face, FRAGMENT_FACE);
agx_ppp_push(&ppp, FRAGMENT_FACE_2, cfg) cfg.object_type = object_type;
agx_ppp_push_packed(&ppp, ctx->zs->front_stencil.opaque, FRAGMENT_STENCIL);
agx_pack(&back_face, FRAGMENT_FACE, cfg) {
bool twosided = ctx->zs->base.stencil[1].enabled;
cfg.stencil_reference = ctx->stencil_ref.ref_value[twosided ? 1 : 0];
cfg.line_width = rast->line_width;
cfg.polygon_mode = AGX_POLYGON_MODE_FILL;
};
back_face.opaque[0] |= ctx->zs->depth.opaque[0];
agx_ppp_push_packed(&ppp, &back_face, FRAGMENT_FACE);
agx_ppp_push(&ppp, FRAGMENT_FACE_2, cfg) cfg.object_type = object_type;
agx_ppp_push_packed(&ppp, ctx->zs->back_stencil.opaque, FRAGMENT_STENCIL);
agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10;
agx_ppp_push(&ppp, OUTPUT_SELECT, cfg) {
cfg.varyings = !!fs->info.varyings.fs.nr_bindings;
cfg.point_size = vs->info.writes_psiz;
cfg.frag_coord_z = fs->info.varyings.fs.reads_z;
}
agx_ppp_push(&ppp, VARYING_0, cfg) {
cfg.count = agx_num_general_outputs(&ctx->vs->info.varyings.vs);
}
agx_ppp_push(&ppp, VARYING_1, cfg);
agx_ppp_push_packed(&ppp, ctx->rast->cull, CULL);
agx_ppp_push(&ppp, CULL_2, cfg);
unsigned frag_tex_count = ctx->stage[PIPE_SHADER_FRAGMENT].texture_count;
agx_ppp_push(&ppp, FRAGMENT_SHADER, cfg) {
cfg.groups_of_8_immediate_textures = DIV_ROUND_UP(frag_tex_count, 8);
cfg.groups_of_4_samplers = DIV_ROUND_UP(frag_tex_count, 4);
cfg.more_than_4_textures = frag_tex_count >= 4;
cfg.cf_binding_count = ctx->fs->info.varyings.fs.nr_bindings;
cfg.pipeline = pipeline_fragment;
cfg.cf_bindings = varyings;
}
agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY, cfg);
agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg);
agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg);
agx_ppp_push(&ppp, OUTPUT_SIZE, cfg) cfg.count = vs->info.varyings.vs.nr_index;
agx_ppp_push(&ppp, VARYING_2, cfg);
agx_ppp_fini(&out, &ppp);
return out;
}