freedreno/ir3: Fix assertion failures dumping CS high full regs.

The *2 here would bump into the *2 in regset, causing assertion failures
dumping CS programs.  Just set the mergedregs flag on a6xx, and don't
duplicate the mergedregs logic.  If you're dealing with new HW where we
don't know if mergedregs is set, you may need to tweak the flag during
disasm setup for the stats to make sense.

Fixes: f7bd3456d7 ("freedreno: deduplicate a3xx+ disasm")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6323>
This commit is contained in:
Eric Anholt 2020-08-14 11:52:57 -07:00 committed by Marge Bot
parent ce335dcb19
commit a27823ef2c
4 changed files with 1057 additions and 68 deletions

File diff suppressed because it is too large Load diff

View file

@ -812,14 +812,13 @@ t4 write SP_VS_OBJ_START_LO (a81c)
:0:0008:0011[00000000x_00000000x] nop
:0:0009:0012[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 8-23 (cnt=16, max=23)
- used (full): 4-11 (cnt=8, max=11)
- used (merged): 8-23 (cnt=16, max=23)
- input (half): (cnt=0, max=0)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- max const: 5
- output (half): (cnt=0, max=0) (estimated)
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
@ -840,14 +839,13 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
:0:0008:0011[00000000x_00000000x] nop
:0:0009:0012[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 8-23 (cnt=16, max=23)
- used (full): 4-11 (cnt=8, max=11)
- used (merged): 8-23 (cnt=16, max=23)
- input (half): (cnt=0, max=0)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- max const: 5
- output (half): (cnt=0, max=0) (estimated)
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
@ -906,14 +904,13 @@ t4 write SP_FS_OBJ_START_LO (a983)
:0:0007:0007[00000000x_00000000x] nop
:0:0008:0008[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-1 4-11 (cnt=10, max=11)
- used (full): 0 2-5 (cnt=5, max=5)
- used (merged): 0-1 4-11 (cnt=10, max=11)
- input (half): (cnt=0, max=0)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- max const: 0
- output (half): (cnt=0, max=0) (estimated)
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
@ -933,14 +930,13 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
:0:0007:0007[00000000x_00000000x] nop
:0:0008:0008[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-1 4-11 (cnt=10, max=11)
- used (full): 0 2-5 (cnt=5, max=5)
- used (merged): 0-1 4-11 (cnt=10, max=11)
- input (half): (cnt=0, max=0)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- max const: 0
- output (half): (cnt=0, max=0) (estimated)
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
@ -1502,14 +1498,13 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
:0:0008:0011[00000000x_00000000x] nop
:0:0009:0012[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 8-23 (cnt=16, max=23)
- used (full): 4-11 (cnt=8, max=11)
- used (merged): 8-23 (cnt=16, max=23)
- input (half): (cnt=0, max=0)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- max const: 5
- output (half): (cnt=0, max=0) (estimated)
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 last-baryf, 0 half, 3 full
- shaderdb: 8 cat0, 0 cat1, 1 cat2, 4 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7
@ -1538,14 +1533,13 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
:0:0007:0007[00000000x_00000000x] nop
:0:0008:0008[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-1 4-11 (cnt=10, max=11)
- used (full): 0 2-5 (cnt=5, max=5)
- used (merged): 0-1 4-11 (cnt=10, max=11)
- input (half): (cnt=0, max=0)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- max const: 0
- output (half): (cnt=0, max=0) (estimated)
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 3 last-baryf, 0 half, 2 full
- shaderdb: 5 cat0, 0 cat1, 4 cat2, 0 cat3, 0 cat4, 0 cat5, 0 cat6, 0 cat7

View file

@ -637,7 +637,6 @@ t4 write SP_VS_OBJ_START_LO (a81c)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -660,7 +659,6 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -1109,7 +1107,6 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -1955,7 +1952,6 @@ t4 write SP_VS_OBJ_START_LO (a81c)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -1978,7 +1974,6 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -3498,14 +3493,13 @@ t4 write SP_FS_OBJ_START_LO (a983)
:0:1404:2412[00000000x_00000000x] nop
:0:1405:2413[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-147 (cnt=148, max=147)
- used (full): 0-73 (cnt=74, max=73)
- used (merged): 0-147 (cnt=148, max=147)
- input (half): (cnt=0, max=0)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- max const: 113
- output (half): (cnt=0, max=0) (estimated)
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
@ -4922,14 +4916,13 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
:0:1404:2412[00000000x_00000000x] nop
:0:1405:2413[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-147 (cnt=148, max=147)
- used (full): 0-73 (cnt=74, max=73)
- used (merged): 0-147 (cnt=148, max=147)
- input (half): (cnt=0, max=0)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- max const: 113
- output (half): (cnt=0, max=0) (estimated)
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7
@ -5339,7 +5332,6 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -6776,14 +6768,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
:0:1404:2412[00000000x_00000000x] nop
:0:1405:2413[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-147 (cnt=148, max=147)
- used (full): 0-73 (cnt=74, max=73)
- used (merged): 0-147 (cnt=148, max=147)
- input (half): (cnt=0, max=0)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- max const: 113
- output (half): (cnt=0, max=0) (estimated)
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- shaderdb: 2414 instructions, 1114 nops, 1300 non-nops, (1406 instlen), 0 last-baryf, 0 half, 19 full
- shaderdb: 1120 cat0, 48 cat1, 551 cat2, 512 cat3, 183 cat4, 0 cat5, 0 cat6, 0 cat7

View file

@ -92,7 +92,6 @@ struct disasm_ctx {
/* tracking for register usage */
struct {
regmask_t used;
regmask_t used_merged;
regmask_t rbw; /* read before write */
regmask_t war; /* write after read */
unsigned max_const;
@ -248,11 +247,6 @@ static void print_reg_stats(struct disasm_ctx *ctx)
fprintf(ctx->out, "%s- used (full):", levels[ctx->level]);
fullreg = print_regs(ctx, &ctx->regs.used, true);
fprintf(ctx->out, "\n");
if (ctx->gpu_id >= 600) {
fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]);
print_regs(ctx, &ctx->regs.used_merged, false);
fprintf(ctx->out, "\n");
}
fprintf(ctx->out, "%s- input (half):", levels[ctx->level]);
print_regs(ctx, &ctx->regs.rbw, false);
fprintf(ctx->out, "\n");
@ -310,15 +304,6 @@ static void process_reg_dst(struct disasm_ctx *ctx)
regmask_set(&ctx->regs.war, dst, ctx->last_dst_full);
regmask_set(&ctx->regs.used, dst, ctx->last_dst_full);
if (ctx->gpu_id >= 600) {
if (ctx->last_dst_full) {
regmask_set(&ctx->regs.used_merged, (dst*2)+0, false);
regmask_set(&ctx->regs.used_merged, (dst*2)+1, false);
} else {
regmask_set(&ctx->regs.used_merged, dst, false);
}
}
}
ctx->last_dst_valid = false;
@ -368,13 +353,6 @@ static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
regmask_clear(&ctx->regs.war, src, info->full);
regmask_set(&ctx->regs.used, src, info->full);
if (info->full) {
regmask_set(&ctx->regs.used_merged, (src*2)+0, false);
regmask_set(&ctx->regs.used_merged, (src*2)+1, false);
} else {
regmask_set(&ctx->regs.used_merged, src, false);
}
if (!info->r)
break;
}
@ -1650,6 +1628,11 @@ int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
ctx.level = level;
ctx.gpu_id = gpu_id;
ctx.stats = stats;
if (gpu_id >= 600) {
ctx.regs.used.mergedregs = true;
ctx.regs.rbw.mergedregs = true;
ctx.regs.war.mergedregs = true;
}
memset(ctx.stats, 0, sizeof(*ctx.stats));
for (i = 0; i < sizedwords; i += 2) {