freedreno/ir3: Fix assertion failures dumping CS high full regs.

The *2 here would bump into the *2 in regset, causing assertion failures
dumping CS programs.  Just set the mergedregs flag on a6xx, and don't
duplicate the mergedregs logic.  If you're dealing with new HW where we
don't know if mergedregs is set, you may need to tweak the flag during
disasm setup for the stats to make sense.

Fixes: f7bd3456d7 ("freedreno: deduplicate a3xx+ disasm")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6323>
(cherry picked from commit a27823ef2c)
This commit is contained in:
Eric Anholt 2020-08-14 11:52:57 -07:00 committed by Dylan Baker
parent fc698d8ce7
commit 99540aa4fa
5 changed files with 1058 additions and 69 deletions

View file

@ -1093,7 +1093,7 @@
"description": "freedreno/ir3: Fix assertion failures dumping CS high full regs.",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"master_sha": null,
"because_sha": "f7bd3456d79aaeccb5f5e8d1408e85ad198f4f38"
},

File diff suppressed because it is too large Load diff

View file

@ -812,14 +812,13 @@ t4 write SP_VS_OBJ_START_LO (a81c)
:0:0008:0011[00000000x_00000000x] nop
:0:0009:0012[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 8-23 (cnt=16, max=23)
- used (full): 4-11 (cnt=8, max=11)
- used (merged): 8-23 (cnt=16, max=23)
- input (half): (cnt=0, max=0)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- max const: 5
- output (half): (cnt=0, max=0) (estimated)
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 half, 3 full
- shaderdb: 0 (ss), 0 (sy)
@ -839,14 +838,13 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
:0:0008:0011[00000000x_00000000x] nop
:0:0009:0012[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 8-23 (cnt=16, max=23)
- used (full): 4-11 (cnt=8, max=11)
- used (merged): 8-23 (cnt=16, max=23)
- input (half): (cnt=0, max=0)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- max const: 5
- output (half): (cnt=0, max=0) (estimated)
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 half, 3 full
- shaderdb: 0 (ss), 0 (sy)
@ -904,14 +902,13 @@ t4 write SP_FS_OBJ_START_LO (a983)
:0:0007:0007[00000000x_00000000x] nop
:0:0008:0008[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-1 4-11 (cnt=10, max=11)
- used (full): 0 2-5 (cnt=5, max=5)
- used (merged): 0-1 4-11 (cnt=10, max=11)
- input (half): (cnt=0, max=0)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- max const: 0
- output (half): (cnt=0, max=0) (estimated)
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
- shaderdb: 0 (ss), 0 (sy)
@ -930,14 +927,13 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
:0:0007:0007[00000000x_00000000x] nop
:0:0008:0008[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-1 4-11 (cnt=10, max=11)
- used (full): 0 2-5 (cnt=5, max=5)
- used (merged): 0-1 4-11 (cnt=10, max=11)
- input (half): (cnt=0, max=0)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- max const: 0
- output (half): (cnt=0, max=0) (estimated)
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
- shaderdb: 0 (ss), 0 (sy)
@ -1498,14 +1494,13 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
:0:0008:0011[00000000x_00000000x] nop
:0:0009:0012[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 8-23 (cnt=16, max=23)
- used (full): 4-11 (cnt=8, max=11)
- used (merged): 8-23 (cnt=16, max=23)
- input (half): (cnt=0, max=0)
- input (half): 8-19 (cnt=12, max=19)
- input (full): 4-9 (cnt=6, max=9)
- max const: 5
- output (half): (cnt=0, max=0) (estimated)
- output (half): 16-23 (cnt=8, max=23) (estimated)
- output (full): 8-11 (cnt=4, max=11) (estimated)
- shaderdb: 13 instructions, 7 nops, 6 non-nops, (10 instlen), 0 half, 3 full
- shaderdb: 0 (ss), 0 (sy)
@ -1533,14 +1528,13 @@ t7 opcode: CP_DRAW_INDIRECT_MULTI (2a) (12 dwords)
:0:0007:0007[00000000x_00000000x] nop
:0:0008:0008[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-1 4-11 (cnt=10, max=11)
- used (full): 0 2-5 (cnt=5, max=5)
- used (merged): 0-1 4-11 (cnt=10, max=11)
- input (half): (cnt=0, max=0)
- input (half): 0-1 (cnt=2, max=1)
- input (full): 0 (cnt=1, max=0)
- max const: 0
- output (half): (cnt=0, max=0) (estimated)
- output (half): 4-11 (cnt=8, max=11) (estimated)
- output (full): 2-5 (cnt=4, max=5) (estimated)
- shaderdb: 9 instructions, 4 nops, 5 non-nops, (9 instlen), 0 half, 2 full
- shaderdb: 0 (ss), 0 (sy)

View file

@ -637,7 +637,6 @@ t4 write SP_VS_OBJ_START_LO (a81c)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -659,7 +658,6 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -1107,7 +1105,6 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -1952,7 +1949,6 @@ t4 write SP_VS_OBJ_START_LO (a81c)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -1974,7 +1970,6 @@ t7 opcode: CP_LOAD_STATE6_GEOM (32) (4 dwords)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -3493,14 +3488,13 @@ t4 write SP_FS_OBJ_START_LO (a983)
:0:1404:2412[00000000x_00000000x] nop
:0:1405:2413[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-147 (cnt=148, max=147)
- used (full): 0-73 (cnt=74, max=73)
- used (merged): 0-147 (cnt=148, max=147)
- input (half): (cnt=0, max=0)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- max const: 113
- output (half): (cnt=0, max=0) (estimated)
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
- shaderdb: 140 (ss), 0 (sy)
@ -4916,14 +4910,13 @@ t7 opcode: CP_LOAD_STATE6_FRAG (34) (4 dwords)
:0:1404:2412[00000000x_00000000x] nop
:0:1405:2413[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-147 (cnt=148, max=147)
- used (full): 0-73 (cnt=74, max=73)
- used (merged): 0-147 (cnt=148, max=147)
- input (half): (cnt=0, max=0)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- max const: 113
- output (half): (cnt=0, max=0) (estimated)
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
- shaderdb: 140 (ss), 0 (sy)
@ -5332,7 +5325,6 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
Register Stats:
- used (half): (cnt=0, max=0)
- used (full): (cnt=0, max=0)
- used (merged): (cnt=0, max=0)
- input (half): (cnt=0, max=0)
- input (full): (cnt=0, max=0)
- max const: 0
@ -6768,14 +6760,13 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
:0:1404:2412[00000000x_00000000x] nop
:0:1405:2413[00000000x_00000000x] nop
Register Stats:
- used (half): (cnt=0, max=0)
- used (half): 0-147 (cnt=148, max=147)
- used (full): 0-73 (cnt=74, max=73)
- used (merged): 0-147 (cnt=148, max=147)
- input (half): (cnt=0, max=0)
- input (half): 38-41 (cnt=4, max=41)
- input (full): 19-20 (cnt=2, max=20)
- max const: 113
- output (half): (cnt=0, max=0) (estimated)
- output (half): 8-15 (cnt=8, max=15) (estimated)
- output (full): 4-7 (cnt=4, max=7) (estimated)
- shaderdb: 2414 instructions, 1355 nops, 1059 non-nops, (1406 instlen), 0 half, 19 full
- shaderdb: 140 (ss), 0 (sy)

View file

@ -92,7 +92,6 @@ struct disasm_ctx {
/* tracking for register usage */
struct {
regmask_t used;
regmask_t used_merged;
regmask_t rbw; /* read before write */
regmask_t war; /* write after read */
unsigned max_const;
@ -248,11 +247,6 @@ static void print_reg_stats(struct disasm_ctx *ctx)
fprintf(ctx->out, "%s- used (full):", levels[ctx->level]);
fullreg = print_regs(ctx, &ctx->regs.used, true);
fprintf(ctx->out, "\n");
if (ctx->gpu_id >= 600) {
fprintf(ctx->out, "%s- used (merged):", levels[ctx->level]);
print_regs(ctx, &ctx->regs.used_merged, false);
fprintf(ctx->out, "\n");
}
fprintf(ctx->out, "%s- input (half):", levels[ctx->level]);
print_regs(ctx, &ctx->regs.rbw, false);
fprintf(ctx->out, "\n");
@ -299,15 +293,6 @@ static void process_reg_dst(struct disasm_ctx *ctx)
regmask_set(&ctx->regs.war, dst, ctx->last_dst_full);
regmask_set(&ctx->regs.used, dst, ctx->last_dst_full);
if (ctx->gpu_id >= 600) {
if (ctx->last_dst_full) {
regmask_set(&ctx->regs.used_merged, (dst*2)+0, false);
regmask_set(&ctx->regs.used_merged, (dst*2)+1, false);
} else {
regmask_set(&ctx->regs.used_merged, dst, false);
}
}
}
ctx->last_dst_valid = false;
@ -357,13 +342,6 @@ static void print_src(struct disasm_ctx *ctx, struct reginfo *info)
regmask_clear(&ctx->regs.war, src, info->full);
regmask_set(&ctx->regs.used, src, info->full);
if (info->full) {
regmask_set(&ctx->regs.used_merged, (src*2)+0, false);
regmask_set(&ctx->regs.used_merged, (src*2)+1, false);
} else {
regmask_set(&ctx->regs.used_merged, src, false);
}
if (!info->r)
break;
}
@ -1628,6 +1606,11 @@ int disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
ctx.level = level;
ctx.gpu_id = gpu_id;
ctx.stats = stats;
if (gpu_id >= 600) {
ctx.regs.used.mergedregs = true;
ctx.regs.rbw.mergedregs = true;
ctx.regs.war.mergedregs = true;
}
memset(ctx.stats, 0, sizeof(*ctx.stats));
for (i = 0; i < sizedwords; i += 2) {