freedreno,tu: Update SP_FS_PREFETCH,SP_FS_PREFETCH_CNTL regs definition

Reverse engineer more fields of these regs.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19652>
This commit is contained in:
Danylo Piliaiev 2022-11-10 15:38:28 +01:00 committed by Marge Bot
parent c1a0f4b8fa
commit c7c186590c
10 changed files with 95 additions and 39 deletions

View file

@ -7402,10 +7402,10 @@ clusters:
00000000 SP_FS_MRT[0x6].REG: { COLOR_FORMAT = 0 }
00000000 SP_FS_MRT[0x7].REG: { COLOR_FORMAT = 0 }
00000000 SP_FS_PREFETCH_CNTL: { COUNT = 0 }
03c00000 SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
03c00000 SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
03c00000 SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
03c00000 SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
03c00000 SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
03c00000 SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
03c00000 SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
03c00000 SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
00000000 SP_FS_BINDLESS_PREFETCH[0].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
00000000 SP_FS_BINDLESS_PREFETCH[0x1].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
00000000 SP_FS_BINDLESS_PREFETCH[0x2].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
@ -7484,10 +7484,10 @@ clusters:
00000000 SP_FS_MRT[0x6].REG: { COLOR_FORMAT = 0 }
00000000 SP_FS_MRT[0x7].REG: { COLOR_FORMAT = 0 }
00000000 SP_FS_PREFETCH_CNTL: { COUNT = 0 }
03c00000 SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
03c00000 SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
03c00000 SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
03c00000 SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = 0 }
03c00000 SP_FS_PREFETCH[0].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
03c00000 SP_FS_PREFETCH[0x1].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
03c00000 SP_FS_PREFETCH[0x2].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
03c00000 SP_FS_PREFETCH[0x3].CMD: { SRC = 0 | SAMP_ID = 0 | TEX_ID = 0 | DST = r0.x | WRMASK = 0xf | CMD = TEX_PREFETCH_UNK0 }
00000000 SP_FS_BINDLESS_PREFETCH[0].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
00000000 SP_FS_BINDLESS_PREFETCH[0x1].CMD: { SAMP_ID = 0 | TEX_ID = 0 }
00000000 SP_FS_BINDLESS_PREFETCH[0x2].CMD: { SAMP_ID = 0 | TEX_ID = 0 }

View file

@ -996,7 +996,7 @@ t4 write VPC_VARYING_PS_REPL[0].MODE (9208)
0000000001054320: 0000: 48920808 00000000 00000000 00000000 00000000 00000000 00000000 00000000
*
t4 write SP_FS_PREFETCH_CNTL (a99e)
SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
0000000001054344: 0000: 40a99e01 00007fc0
t4 write HLSQ_CONTROL_1_REG (b982)
HLSQ_CONTROL_1_REG: 0x7
@ -1515,7 +1515,7 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
!+ 000000fc SP_FS_OUTPUT[0x6].REG: { REGID = r63.x }
!+ 000000fc SP_FS_OUTPUT[0x7].REG: { REGID = r63.x }
!+ 00000030 SP_FS_MRT[0].REG: { COLOR_FORMAT = FMT6_8_8_8_8_UNORM }
!+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
!+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
+ 00000000 SP_CS_CONFIG: { NTEX = 0 | NSAMP = 0 | NIBO = 0 }
!+ 00000100 SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
!+ 00000001 SP_FS_INSTRLEN: 1

View file

@ -606,7 +606,7 @@ t4 write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833)
SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
0000000001121000: 0000: 40a83301 00000000
t4 write SP_FS_PREFETCH_CNTL (a99e)
SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
0000000001121008: 0000: 40a99e01 00007fc0
t4 write SP_UNKNOWN_A9A8 (a9a8)
SP_UNKNOWN_A9A8: 0
@ -1116,7 +1116,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
!+ 000000fc SP_FS_OUTPUT[0x6].REG: { REGID = r63.x }
!+ 000000fc SP_FS_OUTPUT[0x7].REG: { REGID = r63.x }
!+ 00000031 SP_FS_MRT[0].REG: { COLOR_FORMAT = FMT6_8_8_8_X8_UNORM }
!+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
!+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
+ 00000000 SP_UNKNOWN_A9A8: 0
!+ 00000005 SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | ISAMMODE = ISAMMODE_GL }
!+ 00000100 SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }
@ -1900,7 +1900,7 @@ t4 write SP_HS_OBJ_FIRST_EXEC_OFFSET (a833)
SP_HS_OBJ_FIRST_EXEC_OFFSET: 0
0000000001120000: 0000: 40a83301 00000000
t4 write SP_FS_PREFETCH_CNTL (a99e)
SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
0000000001120008: 0000: 40a99e01 00007fc0
t4 write SP_UNKNOWN_A9A8 (a9a8)
SP_UNKNOWN_A9A8: 0
@ -6738,7 +6738,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
!+ 00000004 SP_FS_OUTPUT[0x5].REG: { REGID = r1.x }
!+ 00000004 SP_FS_OUTPUT[0x6].REG: { REGID = r1.x }
!+ 00000004 SP_FS_OUTPUT[0x7].REG: { REGID = r1.x }
+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK4 = r63.x | UNK12 = 0x7 }
+ 00007fc0 SP_FS_PREFETCH_CNTL: { COUNT = 0 | UNK6 = 0x1ff }
+ 00000000 SP_UNKNOWN_A9A8: 0
+ 00000005 SP_MODE_CONTROL: { CONSTANT_DEMOTION_ENABLE | ISAMMODE = ISAMMODE_GL }
+ 00000100 SP_FS_CONFIG: { ENABLED | NTEX = 0 | NSAMP = 0 | NIBO = 0 }

View file

@ -4575,18 +4575,18 @@ collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir)
&ctx->so->sampler_prefetch[idx];
idx++;
if (instr->flags & IR3_INSTR_B) {
fetch->cmd = IR3_SAMPLER_BINDLESS_PREFETCH_CMD;
fetch->bindless = instr->flags & IR3_INSTR_B;
if (fetch->bindless) {
/* In bindless mode, the index is actually the base */
fetch->tex_id = instr->prefetch.tex_base;
fetch->samp_id = instr->prefetch.samp_base;
fetch->tex_bindless_id = instr->prefetch.tex;
fetch->samp_bindless_id = instr->prefetch.samp;
} else {
fetch->cmd = IR3_SAMPLER_PREFETCH_CMD;
fetch->tex_id = instr->prefetch.tex;
fetch->samp_id = instr->prefetch.samp;
}
fetch->tex_opc = OPC_SAM;
fetch->wrmask = instr->dsts[0]->wrmask;
fetch->dst = instr->dsts[0]->num;
fetch->src = instr->prefetch.input_offset;

View file

@ -770,10 +770,11 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
for (i = 0; i < so->num_sampler_prefetch; i++) {
const struct ir3_sampler_prefetch *fetch = &so->sampler_prefetch[i];
fprintf(out,
"@tex(%sr%d.%c)\tsrc=%u, samp=%u, tex=%u, wrmask=0x%x, cmd=%u\n",
"@tex(%sr%d.%c)\tsrc=%u, samp=%u, tex=%u, wrmask=0x%x, opc=%s\n",
fetch->half_precision ? "h" : "", fetch->dst >> 2,
"xyzw"[fetch->dst & 0x3], fetch -> src, fetch -> samp_id,
fetch -> tex_id, fetch -> wrmask, fetch -> cmd);
fetch -> tex_id, fetch -> wrmask,
disasm_a3xx_instr_name(fetch->tex_opc));
}
const struct ir3_const_state *const_state = ir3_const_state(so);

View file

@ -263,6 +263,7 @@ struct ir3_stream_output_info {
*/
struct ir3_sampler_prefetch {
uint8_t src;
bool bindless;
uint8_t samp_id;
uint8_t tex_id;
uint16_t samp_bindless_id;
@ -270,7 +271,7 @@ struct ir3_sampler_prefetch {
uint8_t dst;
uint8_t wrmask;
uint8_t half_precision;
uint8_t cmd;
opc_t tex_opc;
};
/* Configuration key used to identify a shader variant.. different

View file

@ -3226,12 +3226,24 @@ to upconvert to 32b float internally?
</array>
<reg32 offset="0xa99e" name="SP_FS_PREFETCH_CNTL">
<!-- unknown bits 0x7fc0 always set -->
<bitfield name="COUNT" low="0" high="2" type="uint"/>
<!-- b3 set if no other use of varyings in the shader itself.. maybe alternative to dummy bary.f? -->
<bitfield name="UNK3" pos="3" type="boolean"/>
<bitfield name="UNK4" low="4" high="11" type="a3xx_regid"/>
<bitfield name="UNK12" low="12" high="14"/>
<bitfield name="IJ_WRITE_DISABLE" pos="3" type="boolean"/>
<doc>
Seem to break derivatives when there is a helper invocation
in the quad. Though from tests it doesn't seem to be
"disable helper invocations" flag.
</doc>
<bitfield name="UNK4" pos="4" type="boolean" />
<doc>
Bypass writing to regs and overwrite output with tex color.
TODO: How does it work with multiple prefetches?
</doc>
<bitfield name="WRITE_COLOR_TO_OUTPUT" pos="5" type="boolean"/>
<doc>
Doesn't seem to be a reg, size doesn't match and it doesn't do
anything observable.
</doc>
<bitfield name="UNK6" low="6" high="14" type="uint"/>
</reg32>
<array offset="0xa99f" name="SP_FS_PREFETCH" stride="1" length="4">
<reg32 offset="0" name="CMD">
@ -3241,14 +3253,10 @@ to upconvert to 32b float internally?
<bitfield name="DST" low="16" high="21" type="a3xx_regid"/>
<bitfield name="WRMASK" low="22" high="25" type="hex"/>
<bitfield name="HALF" pos="26" type="boolean"/>
<!--
CMD seems always 0x4?? 3d, textureProj, textureLod seem to
skip pre-fetch.. TODO test texelFetch
CMD is 0x6 when the Vulkan mode is enabled, and
TEX_ID/SAMP_ID refer to the descriptor sets while the
indices come from SP_FS_BINDLESS_PREFETCH[n]
-->
<bitfield name="CMD" low="27" high="31"/>
<doc>Results in color being zero</doc>
<bitfield name="UNK27" pos="27" type="boolean"/>
<bitfield name="BINDLESS" pos="28" type="boolean"/>
<bitfield name="CMD" low="29" high="31" type="a6xx_tex_prefetch_cmd"/>
</reg32>
</array>
<array offset="0xa9a3" name="SP_FS_BINDLESS_PREFETCH" stride="1" length="4">

View file

@ -376,5 +376,25 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
<value value="0x1" name="RECTANGULAR"/>
</enum>
<doc>
Blob (v615) seem to only use SAM and I wasn't able to coerce
it to produce any other command.
Probably valid for a4xx+ but not enabled or tested on anything
but a6xx.
</doc>
<enum name="a6xx_tex_prefetch_cmd">
<doc> Produces garbage </doc>
<value value="0x0" name="TEX_PREFETCH_UNK0"/>
<value value="0x1" name="TEX_PREFETCH_SAM"/>
<value value="0x2" name="TEX_PREFETCH_GATHER4R"/>
<value value="0x3" name="TEX_PREFETCH_GATHER4G"/>
<value value="0x4" name="TEX_PREFETCH_GATHER4B"/>
<value value="0x5" name="TEX_PREFETCH_GATHER4A"/>
<doc> Causes reads from an invalid address </doc>
<value value="0x6" name="TEX_PREFETCH_UNK6"/>
<doc> Results in color being zero </doc>
<value value="0x7" name="TEX_PREFETCH_UNK7"/>
</enum>
</database>

View file

@ -1468,6 +1468,17 @@ tu6_emit_vpc(struct tu_cs *cs,
tu6_emit_vpc_varying_modes(cs, fs, last_shader);
}
static enum a6xx_tex_prefetch_cmd
tu6_tex_opc_to_prefetch_cmd(opc_t tex_opc)
{
switch (tex_opc) {
case OPC_SAM:
return TEX_PREFETCH_SAM;
default:
unreachable("Unknown tex opc for prefeth cmd");
}
}
void
tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
{
@ -1494,8 +1505,8 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) |
0x7000); // XXX);
COND(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE));
for (int i = 0; i < fs->num_sampler_prefetch; i++) {
const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) |
@ -1504,7 +1515,9 @@ tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd));
COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) |
A6XX_SP_FS_PREFETCH_CMD_CMD(
tu6_tex_opc_to_prefetch_cmd(prefetch->tex_opc)));
}
if (fs->num_sampler_prefetch > 0) {

View file

@ -388,6 +388,17 @@ fd6_emit_tess_bos(struct fd_screen *screen, struct fd_ringbuffer *ring,
OUT_RELOC(ring, screen->tess_bo, 0, 0, 0);
}
static enum a6xx_tex_prefetch_cmd
tex_opc_to_prefetch_cmd(opc_t tex_opc)
{
switch (tex_opc) {
case OPC_SAM:
return TEX_PREFETCH_SAM;
default:
unreachable("Unknown tex opc for prefeth cmd");
}
}
static void
setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
struct fd6_program_state *state,
@ -539,8 +550,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
OUT_PKT4(ring, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
OUT_RING(ring, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) |
0x7000); // XXX
COND(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE));
for (int i = 0; i < fs->num_sampler_prefetch; i++) {
const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
OUT_RING(ring,
@ -550,7 +561,9 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd));
COND(prefetch->bindless, A6XX_SP_FS_PREFETCH_CMD_BINDLESS) |
A6XX_SP_FS_PREFETCH_CMD_CMD(
tex_opc_to_prefetch_cmd(prefetch->tex_opc)));
}
OUT_PKT4(ring, REG_A6XX_SP_UNKNOWN_A9A8, 1);