freedreno/ir3: add meta instruction for pre-fs texture fetch

Add a placeholder instruction to track texture fetches made prior to FS
shader dispatch.  These, like meta:input instructions are scheduled
before any real instructions, so that RA realizes their result values
are live before the first real instruction.  And to give legalize a way
to track usage of fetched sample requiring (sy) sync flags.

There is some related special handling for varying texcoord inputs used
for pre-fs-fetch, so that they are not DCE'd and remain in linkage
between FS and previous stage.  Note that we could almost avoid this
special handling by giving meta:tex_prefetch real src arguments, except
that in the FS stage, inputs are actual bary.f/ldlv instructions.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
This commit is contained in:
Rob Clark 2019-10-11 15:57:22 -07:00 committed by Rob Clark
parent 11e467c378
commit 482e1b9955
6 changed files with 33 additions and 3 deletions

View file

@ -212,6 +212,11 @@ typedef enum {
OPC_META_FO = _OPC(-1, 2),
OPC_META_FI = _OPC(-1, 3),
/* placeholder for texture fetches that run before FS invocation
* starts:
*/
OPC_META_TEX_PREFETCH = _OPC(-1, 4),
} opc_t;
#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))

View file

@ -267,6 +267,10 @@ struct ir3_instruction {
struct {
int off; /* component/offset */
} fo;
struct {
unsigned samp, tex;
unsigned input_offset;
} prefetch;
struct {
/* for sysvals, identifies the sysval type. Mostly so we can
* identify the special cases where a sysval should not be DCE'd
@ -1465,6 +1469,9 @@ INSTR4F(G, ATOMIC_XOR)
INSTR0(BAR)
INSTR0(FENCE)
/* meta instructions: */
INSTR0(META_TEX_PREFETCH);
/* ************************************************************************* */
/* split this out or find some helper to use.. like main/bitset.h.. */

View file

@ -2635,6 +2635,16 @@ pack_inlocs(struct ir3_context *ctx)
compile_assert(ctx, i < so->inputs_count);
used_components[i] |= 1 << j;
} else if (instr->opc == OPC_META_TEX_PREFETCH) {
for (int n = 0; n < 2; n++) {
unsigned inloc = instr->prefetch.input_offset + n;
unsigned i = inloc / 4;
unsigned j = inloc % 4;
compile_assert(ctx, i < so->inputs_count);
used_components[i] |= 1 << j;
}
}
}
}

View file

@ -118,7 +118,10 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
n->flags &= ~(IR3_INSTR_SS | IR3_INSTR_SY);
if (is_meta(n))
/* _meta::tex_prefetch instructions removed later in
* collect_tex_prefetches()
*/
if (is_meta(n) && (n->opc != OPC_META_TEX_PREFETCH))
continue;
if (is_input(n)) {
@ -237,7 +240,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
if (is_sfu(n))
regmask_set(&state->needs_ss, n->regs[0]);
if (is_tex(n)) {
if (is_tex(n) || (n->opc == OPC_META_TEX_PREFETCH)) {
regmask_set(&state->needs_sy, n->regs[0]);
ctx->need_pixlod = true;
} else if (n->opc == OPC_RESINFO) {

View file

@ -53,6 +53,7 @@ static void print_instr_name(struct ir3_instruction *instr)
case OPC_META_INPUT: printf("_meta:in"); break;
case OPC_META_FO: printf("_meta:fo"); break;
case OPC_META_FI: printf("_meta:fi"); break;
case OPC_META_TEX_PREFETCH: printf("_meta:tex_prefetch"); break;
/* shouldn't hit here.. just for debugging: */
default: printf("_meta:%d", instr->opc); break;
@ -181,6 +182,9 @@ print_instr(struct ir3_instruction *instr, int lvl)
if (instr->opc == OPC_META_FO) {
printf(", off=%d", instr->fo.off);
} else if (instr->opc == OPC_META_TEX_PREFETCH) {
printf(", tex=%d, samp=%d, input_offset=%d", instr->prefetch.tex,
instr->prefetch.samp, instr->prefetch.input_offset);
}
if (is_flow(instr) && instr->cat0.target) {

View file

@ -788,7 +788,8 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
* occupied), and move remaining to depth sorted list:
*/
list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
if (instr->opc == OPC_META_INPUT) {
if ((instr->opc == OPC_META_INPUT) ||
(instr->opc == OPC_META_TEX_PREFETCH)) {
schedule(ctx, instr);
} else {
ir3_insert_by_depth(instr, &ctx->depth_list);