ir3: Add cat5/cat7 cache related instructions

- tcinv - Likely Texture Cache Invalidate (unverified)
- icinv - Mostly sure that it is Instruction Cache Invalidate
- dccln - Data Cache Clean
- dcinv - Data Cache Invalidate
- dcflu - Data Cache Flush

The emission of these instructions were not observed in the wild.

TODO: find out the difference between .shr and .all modes of
      dccln, dcinv, dcflu.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14419>
This commit is contained in:
Danylo Piliaiev 2022-01-06 16:14:39 +02:00 committed by Marge Bot
parent 42dba8ebc5
commit 121e4ca87d
8 changed files with 107 additions and 11 deletions

View file

@ -316,6 +316,7 @@ static const struct opc_info {
OPC(5, OPC_QUAD_SHUFFLE_HORIZ, quad_shuffle.horiz),
OPC(5, OPC_QUAD_SHUFFLE_VERT, quad_shuffle.vert),
OPC(5, OPC_QUAD_SHUFFLE_DIAG, quad_shuffle.diag),
OPC(5, OPC_TCINV, tcinv),
/* macros are needed here for ir3_print */
OPC(5, OPC_DSXPP_MACRO, dsxpp.macro),
OPC(5, OPC_DSYPP_MACRO, dsypp.macro),

View file

@ -263,6 +263,7 @@ typedef enum {
OPC_QUAD_SHUFFLE_HORIZ = _OPC(5, 30),
OPC_QUAD_SHUFFLE_VERT = _OPC(5, 31),
OPC_QUAD_SHUFFLE_DIAG = _OPC(5, 32),
OPC_TCINV = _OPC(5, 33),
/* cat5 meta instructions, placed above the cat5 opc field's size */
OPC_DSXPP_MACRO = _OPC(5, 35),
OPC_DSYPP_MACRO = _OPC(5, 36),
@ -360,6 +361,10 @@ typedef enum {
/* category 7: */
OPC_BAR = _OPC(7, 0),
OPC_FENCE = _OPC(7, 1),
OPC_ICINV = _OPC(7, 3),
OPC_DCCLN = _OPC(7, 4),
OPC_DCINV = _OPC(7, 5),
OPC_DCFLU = _OPC(7, 6),
/* meta instructions (category -1): */
/* placeholder instr to mark shader inputs: */

View file

@ -956,7 +956,7 @@ is_sfu(struct ir3_instruction *instr)
static inline bool
is_tex(struct ir3_instruction *instr)
{
return (opc_cat(instr->opc) == 5);
return (opc_cat(instr->opc) == 5) && instr->opc != OPC_TCINV;
}
static inline bool

View file

@ -302,6 +302,7 @@ static int parse_reg(const char *str)
"quad_shuffle.horiz" return TOKEN(T_OP_QSHUFFLE_H);
"quad_shuffle.vert" return TOKEN(T_OP_QSHUFFLE_V);
"quad_shuffle.diag" return TOKEN(T_OP_QSHUFFLE_DIAG);
"tcinv" return TOKEN(T_OP_TCINV);
/* category 6: */
"ldg" return TOKEN(T_OP_LDG);
@ -381,6 +382,10 @@ static int parse_reg(const char *str)
/* category 7: */
"bar" return TOKEN(T_OP_BAR);
"fence" return TOKEN(T_OP_FENCE);
"icinv" return TOKEN(T_OP_ICINV);
"dccln.all" return TOKEN(T_OP_DCCLN);
"dcinv.all" return TOKEN(T_OP_DCINV);
"dcflu.all" return TOKEN(T_OP_DCFLU);
"f16" return TOKEN(T_TYPE_F16);
"f32" return TOKEN(T_TYPE_F32);

View file

@ -542,6 +542,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_QSHUFFLE_H
%token <tok> T_OP_QSHUFFLE_V
%token <tok> T_OP_QSHUFFLE_DIAG
%token <tok> T_OP_TCINV
/* category 6: */
%token <tok> T_OP_LDG
@ -620,6 +621,10 @@ static void print_token(FILE *file, int type, YYSTYPE value)
/* category 7: */
%token <tok> T_OP_BAR
%token <tok> T_OP_FENCE
%token <tok> T_OP_ICINV
%token <tok> T_OP_DCCLN
%token <tok> T_OP_DCINV
%token <tok> T_OP_DCFLU
%token <u64> T_RAW
@ -1093,6 +1098,7 @@ cat5_instr: cat5_opc_dsxypp cat5_flags dst_reg ',' src_reg
| cat5_opc cat5_flags cat5_type dst_reg ',' cat5_samp
| cat5_opc cat5_flags cat5_type dst_reg ',' cat5_tex
| cat5_opc cat5_flags cat5_type dst_reg
| T_OP_TCINV { new_instr(OPC_TCINV); }
cat6_typed: '.' T_UNTYPED { instr->cat6.typed = 0; }
| '.' T_TYPED { instr->cat6.typed = 1; }
@ -1283,7 +1289,13 @@ cat7_scopes:
cat7_barrier: T_OP_BAR { new_instr(OPC_BAR); } cat7_scopes
| T_OP_FENCE { new_instr(OPC_FENCE); } cat7_scopes
cat7_data_cache: T_OP_DCCLN { new_instr(OPC_DCCLN); }
| T_OP_DCINV { new_instr(OPC_DCINV); }
| T_OP_DCFLU { new_instr(OPC_DCFLU); }
cat7_instr: cat7_barrier
| cat7_data_cache
| T_OP_ICINV { new_instr(OPC_ICINV); }
raw_instr: T_RAW {new_instr(OPC_META_RAW)->raw.value = $1;}

View file

@ -177,6 +177,8 @@ static const struct test {
/* dEQP-VK.subgroups.quad.graphics.subgroupquadswapdiagonal_int */
INSTR_6XX(b7e03104_00180001, "(sy)quad_shuffle.diag (u32)(x)r1.x, r0.x"), /* (sy)quad_shuffle.diag (u32)(xOOO)r1.x, r0.x */
INSTR_6XX(a7000000_00000000, "tcinv"),
/* cat6 */
INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */
@ -416,6 +418,8 @@ static const struct test {
INSTR_6XX(e0fa0000_00000000, "fence.g.l.r.w"),
INSTR_6XX(e09a0000_00000000, "fence.r.w"),
INSTR_6XX(f0420000_00000000, "(sy)bar.g"),
INSTR_6XX(e2080000_00000000, "dccln.all"),
INSTR_6XX(ffffffff_ffffffff, "raw 0xFFFFFFFFFFFFFFFF"),
/* clang-format on */
};

View file

@ -470,6 +470,21 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="tcinv" extends="#instruction">
<doc>
Texture Cache Invalidate ?
</doc>
<display>
{SY}{JP}{NAME}
</display>
<pattern low="0" high="31">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</pattern>
<pattern low="32" high="53">xxxxxxxxxxxxxxxxxxxxxx</pattern>
<pattern low="54" high="58">11100</pattern>
<field name="JP" pos="59" type="bool" display="(jp)"/>
<field name="SY" pos="60" type="bool" display="(sy)"/>
<pattern low="61" high="63">101</pattern> <!-- cat5 -->
</bitset>
<bitset name="brcst.active" extends="#instruction-cat5">
<doc>
The subgroup is divided into (subgroup_size / CLUSTER_SIZE)

View file

@ -25,25 +25,27 @@ SOFTWARE.
<isa>
<!--
Cat7 Instructions: barrier instructions
Cat7 Instructions: barrier, cache, sleep instructions
-->
<bitset name="#instruction-cat7" extends="#instruction">
<pattern low="0" high="31">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</pattern>
<pattern low="32" high="43">xxxxxxxxxxxx</pattern>
<pattern pos="44" >x</pattern> <!-- blob tells that it is (ss) -->
<field pos="59" name="JP" type="bool" display="(jp)"/>
<field pos="60" name="SY" type="bool" display="(sy)"/>
<pattern low="61" high="63">111</pattern> <!-- cat7 -->
</bitset>
<bitset name="#instruction-cat7-barrier" extends="#instruction-cat7">
<display>
{SY}{JP}{NAME}{G}{L}{R}{W}
</display>
<pattern low="0" high="31">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</pattern>
<pattern low="32" high="43">xxxxxxxxxxxx</pattern>
<pattern pos="44" >x</pattern> <!-- possilby (ss) ? -->
<pattern low="45" high="50">x1xxxx</pattern>
<field pos="51" name="W" type="bool" display=".w" /> <!-- write -->
<field pos="52" name="R" type="bool" display=".r" /> <!-- read -->
<field pos="53" name="L" type="bool" display=".l" /> <!-- local -->
<field pos="54" name="G" type="bool" display=".g" /> <!-- global -->
<!-- 4b OPC -->
<field pos="59" name="JP" type="bool" display="(jp)"/>
<field pos="60" name="SY" type="bool" display="(sy)"/>
<pattern low="61" high="63">111</pattern> <!-- cat7 -->
<encode>
<map name="W">src->cat7.w</map>
<map name="R">src->cat7.r</map>
@ -52,12 +54,64 @@ SOFTWARE.
</encode>
</bitset>
<bitset name="bar" extends="#instruction-cat7">
<bitset name="bar" extends="#instruction-cat7-barrier">
<pattern low="55" high="58">0000</pattern>
</bitset>
<bitset name="fence" extends="#instruction-cat7">
<bitset name="fence" extends="#instruction-cat7-barrier">
<pattern low="55" high="58">0001</pattern>
</bitset>
<enum name="#dccln-type">
<value val="0" display=".shr"/>
<value val="1" display=".all"/>
</enum>
<bitset name="#instruction-cat7-data" extends="#instruction-cat7">
<display>
{SY}{JP}{NAME}{TYPE}
</display>
<pattern low="45" high="50">xxxxxx</pattern>
<field pos="51" name="TYPE" type="#dccln-type"/>
<pattern low="52" high="54">xxx</pattern>
<encode>
<!-- TODO: read handle type -->
<map name="TYPE">1</map>
</encode>
</bitset>
<bitset name="icinv" extends="#instruction-cat7">
<doc>
Seem to be Instruction Cache Invalidate, supported by the fact
that it considerably slows shader execution compared to
data cache instructions.
</doc>
<display>
{SY}{JP}{NAME}
</display>
<pattern low="45" high="54">xxxxxxxxxx</pattern>
<pattern low="55" high="58">0011</pattern>
</bitset>
<bitset name="dccln" extends="#instruction-cat7-data">
<doc>
Data (Cache?) Clean
</doc>
<pattern low="55" high="58">0100</pattern>
</bitset>
<bitset name="dcinv" extends="#instruction-cat7-data">
<doc>
Data (Cache?) Invalidate
</doc>
<pattern low="55" high="58">0101</pattern>
</bitset>
<bitset name="dcflu" extends="#instruction-cat7-data">
<doc>
Data (Cache?) Flush
</doc>
<pattern low="55" high="58">0110</pattern>
</bitset>
</isa>