freedreno/registers: pm4 cleanup

Use <stripe> to handle 32b vs 64b gpu differences.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37009>
This commit is contained in:
Rob Clark 2025-08-06 08:27:41 -07:00 committed by Marge Bot
parent 065d2547e7
commit f31883c20c
6 changed files with 34 additions and 22 deletions

View file

@ -626,7 +626,7 @@ a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3],
fd_pkt7(cs, CP_REG_TO_MEM, 3)
.add(CP_REG_TO_MEM_0(.reg = counter->counter_reg_lo, ._64b = true))
.add(CP_REG_TO_MEM_DEST(query_sample_idx(a6xx_backend, i, start)));
.add(A5XX_CP_REG_TO_MEM_DEST(query_sample_idx(a6xx_backend, i, start)));
}
}
@ -645,7 +645,7 @@ a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3],
fd_pkt7(cs, CP_REG_TO_MEM, 3)
.add(CP_REG_TO_MEM_0(.reg = counter->counter_reg_lo, ._64b = true))
.add(CP_REG_TO_MEM_DEST(query_sample_idx(a6xx_backend, i, stop)));
.add(A5XX_CP_REG_TO_MEM_DEST(query_sample_idx(a6xx_backend, i, stop)));
}
/* and compute the result: */

View file

@ -1264,7 +1264,7 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</reg32>
</domain>
<domain name="CP_REG_TO_MEM" width="32">
<domain name="CP_REG_TO_MEM" width="32" prefix="chip">
<reg32 offset="0" name="0">
<bitfield name="REG" low="0" high="17" type="hex"/>
<!-- number of registers/dwords copied is max(CNT, 1). -->
@ -1272,8 +1272,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<bitfield name="64B" pos="30" type="boolean"/>
<bitfield name="ACCUMULATE" pos="31" type="boolean"/>
</reg32>
<reg32 offset="1" name="DEST32" type="waddress" varset="chip" variants="A2XX-A4XX"/>
<reg64 offset="1" name="DEST" type="waddress" varset="chip" variants="A5XX-"/>
<stripe varset="chip" variants="A2XX-A4XX">
<reg32 offset="1" name="DEST" type="address"/>
</stripe>
<stripe varset="chip" variants="A5XX-">
<reg64 offset="1" name="DEST" type="address"/>
</stripe>
</domain>
<domain name="CP_REG_TO_MEM_OFFSET_REG" width="32">
@ -1323,8 +1327,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
<!-- does the same thing as CP_MEM_TO_MEM::UNK31 -->
<bitfield name="UNK31" pos="31" type="boolean"/>
</reg32>
<reg32 offset="1" name="SRC32" type="address" varset="chip" variants="A2XX-A4XX"/>
<reg64 offset="1" name="SRC" type="address" varset="chip" variants="A5XX-"/>
<stripe varset="chip" variants="A2XX-A4XX">
<reg32 offset="1" name="SRC" type="address"/>
</stripe>
<stripe varset="chip" variants="A5XX-">
<reg64 offset="1" name="SRC" type="address"/>
</stripe>
</domain>
<domain name="CP_MEM_TO_MEM" width="32">
@ -1406,8 +1414,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</domain>
<domain name="CP_MEM_WRITE" width="32">
<reg32 offset="0" name="ADDR32" varset="chip" variants="A2XX-A4XX" type="waddress"/>
<reg64 offset="0" name="ADDR" varset="chip" variants="A5XX-" type="waddress"/>
<stripe varset="chip" variants="A2XX-A4XX">
<reg32 offset="0" name="ADDR" type="address"/>
</stripe>
<stripe varset="chip" variants="A5XX-">
<reg64 offset="0" name="ADDR" type="address"/>
</stripe>
<!-- followed by the DWORDs to write -->
</domain>

View file

@ -3763,7 +3763,7 @@ NEEDS WFI: CP_SCRATCH_REG7 (57f)
PA_SC_WINDOW_OFFSET: { X = 0 | Y = 0 }
0110a268: 0000: c0012d00 00040080 00000000
opcode: CP_MEM_WRITE (3d) (3 dwords)
{ ADDR32 = 0x100903c }
{ ADDR = 0x100903c }
gpuaddr:0100903c
0110a27c: 0.000000
0110a274: 0000: c0013d00 0100903c 00800080
@ -4458,7 +4458,7 @@ NEEDS WFI: CP_SCRATCH_REG6 (57e)
PA_SC_WINDOW_OFFSET: { X = -128 | Y = 0 }
0110a32c: 0000: c0012d00 00040080 00007f80
opcode: CP_MEM_WRITE (3d) (3 dwords)
{ ADDR32 = 0x100903c }
{ ADDR = 0x100903c }
gpuaddr:0100903c
0110a340: 0.000000
0110a338: 0000: c0013d00 0100903c 00800080
@ -4520,7 +4520,7 @@ NEEDS WFI: CP_SCRATCH_REG6 (57e)
PA_SC_WINDOW_OFFSET: { X = 0 | Y = -128 }
0110a3f0: 0000: c0012d00 00040080 7f800000
opcode: CP_MEM_WRITE (3d) (3 dwords)
{ ADDR32 = 0x100903c }
{ ADDR = 0x100903c }
gpuaddr:0100903c
0110a404: 0.000000
0110a3fc: 0000: c0013d00 0100903c 00800080
@ -4582,7 +4582,7 @@ NEEDS WFI: CP_SCRATCH_REG6 (57e)
PA_SC_WINDOW_OFFSET: { X = -128 | Y = -128 }
0110a4b4: 0000: c0012d00 00040080 7f807f80
opcode: CP_MEM_WRITE (3d) (3 dwords)
{ ADDR32 = 0x100903c }
{ ADDR = 0x100903c }
gpuaddr:0100903c
0110a4c8: 0.000000
0110a4c0: 0000: c0013d00 0100903c 00800080

View file

@ -407,7 +407,7 @@ fd6_emit_streamout(fd_cs &cs, struct fd6_emit *emit) assert_dt
assert(so->offsets[i] == 0);
fd_pkt7(cs, CP_MEM_WRITE, 3)
.add(CP_MEM_WRITE_ADDR(offset_bo))
.add(A5XX_CP_MEM_WRITE_ADDR(offset_bo))
.add(target->base.buffer_offset);
fd_pkt4(cs, 1)
@ -419,7 +419,7 @@ fd6_emit_streamout(fd_cs &cs, struct fd6_emit *emit) assert_dt
.shift_by_2 = CHIP == A6XX,
.unk31 = true,
))
.add(CP_MEM_TO_REG_SRC(offset_bo));
.add(A5XX_CP_MEM_TO_REG_SRC(offset_bo));
}
// After a draw HW would write the new offset to offset_bo

View file

@ -1091,7 +1091,7 @@ fd6_build_preemption_preamble(struct fd_context *ctx)
.reg = REG_A6XX_VSC_CHANNEL_VISIBILITY(0),
.cnt = 32,
))
.add(CP_MEM_TO_REG_SRC(
.add(A5XX_CP_MEM_TO_REG_SRC(
control_ptr(fd6_context(ctx), vsc_state),
));
@ -1202,7 +1202,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
.reg = REG_A6XX_VSC_CHANNEL_VISIBILITY(0),
.cnt = 32,
))
.add(CP_REG_TO_MEM_DEST(
.add(A5XX_CP_REG_TO_MEM_DEST(
control_ptr(fd6_context(batch->ctx), vsc_state)
));
} else {

View file

@ -111,7 +111,7 @@ occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
fd_pkt7(cs, CP_MEM_WRITE, 4)
.add(CP_MEM_WRITE_ADDR(query_sample(aq, stop)))
.add(A5XX_CP_MEM_WRITE_ADDR(query_sample(aq, stop)))
.add(0xffffffff)
.add(0xffffffff);
@ -505,7 +505,7 @@ pipeline_stats_resume(struct fd_acc_query *aq, struct fd_batch *batch)
/* snapshot the start value: */
fd_pkt7(cs, CP_REG_TO_MEM, 3)
.add(CP_REG_TO_MEM_0(.reg = reg, .cnt = 2, ._64b = true))
.add(CP_REG_TO_MEM_DEST(stats_sample(aq, start)));
.add(A5XX_CP_REG_TO_MEM_DEST(stats_sample(aq, start)));
assert(type < ARRAY_SIZE(batch->pipeline_stats_queries_active));
@ -529,7 +529,7 @@ pipeline_stats_pause(struct fd_acc_query *aq, struct fd_batch *batch)
/* snapshot the end values: */
fd_pkt7(cs, CP_REG_TO_MEM, 3)
.add(CP_REG_TO_MEM_0(.reg = reg, .cnt = 2, ._64b = true))
.add(CP_REG_TO_MEM_DEST(stats_sample(aq, stop)));
.add(A5XX_CP_REG_TO_MEM_DEST(stats_sample(aq, stop)));
assert(type < ARRAY_SIZE(batch->pipeline_stats_queries_active));
assert(batch->pipeline_stats_queries_active[type] > 0);
@ -863,7 +863,7 @@ perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
fd_pkt7(cs, CP_REG_TO_MEM, 3)
.add(CP_REG_TO_MEM_0(.reg = counter->counter_reg_lo, ._64b = true))
.add(CP_REG_TO_MEM_DEST(query_sample_idx(aq, i, start)));
.add(A5XX_CP_REG_TO_MEM_DEST(query_sample_idx(aq, i, start)));
}
}
@ -890,7 +890,7 @@ perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
fd_pkt7(cs, CP_REG_TO_MEM, 3)
.add(CP_REG_TO_MEM_0(.reg = counter->counter_reg_lo, ._64b = true))
.add(CP_REG_TO_MEM_DEST(query_sample_idx(aq, i, stop)));
.add(A5XX_CP_REG_TO_MEM_DEST(query_sample_idx(aq, i, stop)));
}
/* and compute the result: */