freedreno/a6xx: add OUT_PKT()

Similar to OUT_REG(), this has the benefits of:

1. No more messing up pkt size
2. Detects errors of mixing up the order of dwords in the packet
3. Optimizes to more efficient code

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4813>
This commit is contained in:
Rob Clark 2020-04-29 09:58:38 -07:00 committed by Marge Bot
parent a142bb8992
commit ee293160d7
3 changed files with 97 additions and 5 deletions

View file

@ -623,6 +623,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<reg32 offset="2" name="2">
<bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/>
</reg32>
<reg64 offset="1" name="EXT_SRC_ADDR" type="address"/>
</domain>
<bitset name="vgt_draw_initiator" inline="yes">
@ -705,13 +706,14 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<reg32 offset="5" name="5">
<bitfield name="INDX_BASE_HI" low="0" high="31"/>
</reg32>
<reg64 offset="4" name="INDX_BASE" type="address"/>
<reg32 offset="6" name="6">
<bitfield name="INDX_SIZE" low="0" high="31"/>
</reg32>
</stripe>
<reg32 offset="4" name="4">
<bitfield name="INDX_BASE" low="0" high="31"/>
<bitfield name="INDX_BASE" low="0" high="31" type="address"/>
</reg32>
<reg32 offset="5" name="5">
@ -721,13 +723,19 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<domain name="CP_DRAW_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-">
<reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/>
<strip variants="A4XX">
<reg32 offset="1" name="1">
<bitfield name="INDIRECT" low="0" high="31"/>
</reg32>
</strip>
<stripe variants="A5XX-">
<reg32 offset="1" name="1">
<bitfield name="INDIRECT_LO" low="0" high="31"/>
</reg32>
<reg32 offset="2" name="2">
<bitfield name="INDIRECT_HI" low="0" high="31"/>
</reg32>
<reg64 offset="1" name="INDIRECT" type="address"/>
</stripe>
</domain>
@ -752,6 +760,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<reg32 offset="2" name="2">
<bitfield name="INDX_BASE_HI" low="0" high="31"/>
</reg32>
<reg64 offset="1" name="INDX_BASE" type="address"/>
<reg32 offset="3" name="3">
<!-- max # of elements in index buffer -->
<bitfield name="MAX_INDICES" low="0" high="31" type="uint"/>
@ -762,6 +771,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<reg32 offset="5" name="5">
<bitfield name="INDIRECT_HI" low="0" high="31"/>
</reg32>
<reg64 offset="4" name="INDIRECT" type="address"/>
</stripe>
</domain>

View file

@ -47,3 +47,10 @@ freedreno_xml_header_files += custom_target(
command : [prog_python, '@INPUT@', '--pack-structs'],
capture : true,
)
freedreno_xml_header_files += custom_target(
'adreno-pm4-pack.xml.h',
input : ['gen_header.py', 'adreno_pm4.xml'],
output : 'adreno-pm4-pack.xml.h',
command : [prog_python, '@INPUT@', '--pack-structs'],
capture : true,
)

View file

@ -39,6 +39,7 @@ struct fd_reg_pair {
#define __bo_type struct fd_bo *
#include "a6xx-pack.xml.h"
#include "adreno-pm4-pack.xml.h"
#define __assert_eq(a, b) \
do { \
@ -51,7 +52,8 @@ struct fd_reg_pair {
#define __ONE_REG(i, ...) \
do { \
const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \
/* NOTE: allow regs[0].reg==0, this happens in OUT_PKT() */ \
if (i < ARRAY_SIZE(regs) && (i == 0 || regs[i].reg > 0)) { \
__assert_eq(regs[0].reg + i, regs[i].reg); \
if (regs[i].bo) { \
struct fd_reloc reloc = { \
@ -109,4 +111,77 @@ struct fd_reg_pair {
ring->cur = p; \
} while (0)
#define OUT_PKT(ring, opcode, ...) \
do { \
const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
unsigned count = ARRAY_SIZE(regs); \
\
STATIC_ASSERT(count <= 16); \
\
BEGIN_RING(ring, count + 1); \
uint32_t *p = ring->cur; \
*p++ = CP_TYPE7_PKT | count | \
(_odd_parity_bit(count) << 15) | \
((opcode & 0x7f) << 16) | \
((_odd_parity_bit(opcode) << 23)); \
\
__ONE_REG( 0, __VA_ARGS__); \
__ONE_REG( 1, __VA_ARGS__); \
__ONE_REG( 2, __VA_ARGS__); \
__ONE_REG( 3, __VA_ARGS__); \
__ONE_REG( 4, __VA_ARGS__); \
__ONE_REG( 5, __VA_ARGS__); \
__ONE_REG( 6, __VA_ARGS__); \
__ONE_REG( 7, __VA_ARGS__); \
__ONE_REG( 8, __VA_ARGS__); \
__ONE_REG( 9, __VA_ARGS__); \
__ONE_REG(10, __VA_ARGS__); \
__ONE_REG(11, __VA_ARGS__); \
__ONE_REG(12, __VA_ARGS__); \
__ONE_REG(13, __VA_ARGS__); \
__ONE_REG(14, __VA_ARGS__); \
__ONE_REG(15, __VA_ARGS__); \
ring->cur = p; \
} while (0)
/* similar to OUT_PKT() but appends specified # of dwords
* copied for buf to the end of the packet (ie. for use-
* cases like CP_LOAD_STATE)
*/
#define OUT_PKTBUF(ring, opcode, dwords, sizedwords, ...) \
do { \
const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \
unsigned count = ARRAY_SIZE(regs); \
\
STATIC_ASSERT(count <= 16); \
count += sizedwords; \
\
BEGIN_RING(ring, count + 1); \
uint32_t *p = ring->cur; \
*p++ = CP_TYPE7_PKT | count | \
(_odd_parity_bit(count) << 15) | \
((opcode & 0x7f) << 16) | \
((_odd_parity_bit(opcode) << 23)); \
\
__ONE_REG( 0, __VA_ARGS__); \
__ONE_REG( 1, __VA_ARGS__); \
__ONE_REG( 2, __VA_ARGS__); \
__ONE_REG( 3, __VA_ARGS__); \
__ONE_REG( 4, __VA_ARGS__); \
__ONE_REG( 5, __VA_ARGS__); \
__ONE_REG( 6, __VA_ARGS__); \
__ONE_REG( 7, __VA_ARGS__); \
__ONE_REG( 8, __VA_ARGS__); \
__ONE_REG( 9, __VA_ARGS__); \
__ONE_REG(10, __VA_ARGS__); \
__ONE_REG(11, __VA_ARGS__); \
__ONE_REG(12, __VA_ARGS__); \
__ONE_REG(13, __VA_ARGS__); \
__ONE_REG(14, __VA_ARGS__); \
__ONE_REG(15, __VA_ARGS__); \
memcpy(p, dwords, 4 * sizedwords); \
p += sizedwords; \
ring->cur = p; \
} while (0)
#endif /* FD6_PACK_H */