mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 00:38:48 +02:00
amd: switch to new packet definitions for all packets
The new definitions have their numbers offset by 1 (e.g. S_580 -> S_581). The remaining old definitions are adjusted to match that. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40588>
This commit is contained in:
parent
30f8bbd97b
commit
a7c63ae6fa
32 changed files with 282 additions and 525 deletions
|
|
@ -1020,7 +1020,8 @@ ac_cmdbuf_flush_vgt_streamout(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level
|
|||
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
|
||||
|
||||
ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||
ac_cmdbuf_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME));
|
||||
ac_cmdbuf_emit(S_371_DST_SEL(V_371_MEM_MAPPED_REGISTER) |
|
||||
S_371_ENGINE_SEL(V_371_MICRO_ENGINE));
|
||||
ac_cmdbuf_emit(R_0300FC_CP_STRMOUT_CNTL >> 2);
|
||||
ac_cmdbuf_emit(0);
|
||||
ac_cmdbuf_emit(0);
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@ ac_emit_cp_indirect_buffer(struct ac_cmdbuf *cs, uint64_t va, uint32_t cdw,
|
|||
uint32_t dword2_flags = 0;
|
||||
|
||||
if (flags & AC_CP_INDIRECT_BUFFER_CHAIN)
|
||||
dword2_flags |= S_3F2_CHAIN(1);
|
||||
dword2_flags |= S_3F3_CHAIN(1);
|
||||
if (flags & AC_CP_INDIRECT_BUFFER_VALID)
|
||||
dword2_flags |= S_3F2_VALID(1);
|
||||
dword2_flags |= S_3F3_VALID(1);
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
ac_cmdbuf_emit(PKT3(PKT3_INDIRECT_BUFFER, 2, predicate));
|
||||
|
|
@ -60,9 +60,9 @@ ac_emit_cp_write_data_head(struct ac_cmdbuf *cs, uint32_t engine_sel,
|
|||
{
|
||||
ac_cmdbuf_begin(cs);
|
||||
ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 2 + size, predicate));
|
||||
ac_cmdbuf_emit(S_370_DST_SEL(dst_sel) |
|
||||
S_370_WR_CONFIRM(1) |
|
||||
S_370_ENGINE_SEL(engine_sel));
|
||||
ac_cmdbuf_emit(S_371_DST_SEL(dst_sel) |
|
||||
S_371_WR_CONFIRM(1) |
|
||||
S_371_ENGINE_SEL(engine_sel));
|
||||
ac_cmdbuf_emit(va);
|
||||
ac_cmdbuf_emit(va >> 32);
|
||||
ac_cmdbuf_end();
|
||||
|
|
@ -83,7 +83,7 @@ void
|
|||
ac_emit_cp_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel,
|
||||
uint64_t va, uint32_t value)
|
||||
{
|
||||
ac_emit_cp_write_data(cs, engine_sel, V_370_MEM, va, 1, &value, false);
|
||||
ac_emit_cp_write_data(cs, engine_sel, V_371_MEMORY, va, 1, &value, false);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -155,23 +155,23 @@ ac_emit_cp_acquire_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx
|
|||
const bool ts = is_ts_event(event_type);
|
||||
const bool ps_done = event_type == V_028A90_PS_DONE;
|
||||
const bool cs_done = event_type == V_028A90_CS_DONE;
|
||||
const uint32_t counter_sel = ts ? V_580_TS_SELECT : ps_done ? V_580_PS_SELECT : V_580_CS_SELECT;
|
||||
const uint32_t counter_sel = ts ? V_581B_TS_SELECT : ps_done ? V_581B_PS_SELECT : V_581B_CS_SELECT;
|
||||
|
||||
assert((int)ts + (int)cs_done + (int)ps_done == 1);
|
||||
assert(!gcr_cntl || stage_sel == V_580_CP_PFP || stage_sel == V_580_CP_ME);
|
||||
assert(stage_sel != V_580_PRE_COLOR);
|
||||
assert(!gcr_cntl || stage_sel == V_581B_CP_PFP || stage_sel == V_581B_CP_ME);
|
||||
assert(stage_sel != V_581B_PRE_COLOR);
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
ac_cmdbuf_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
ac_cmdbuf_emit(S_580_PWS_STAGE_SEL(stage_sel) |
|
||||
S_580_PWS_COUNTER_SEL(counter_sel) |
|
||||
S_580_PWS_ENA2(1) |
|
||||
S_580_PWS_COUNT(count));
|
||||
ac_cmdbuf_emit(S_581B_PWS_STAGE_SEL(stage_sel) |
|
||||
S_581B_PWS_COUNTER_SEL(counter_sel) |
|
||||
S_581B_PWS_ENA2(1) |
|
||||
S_581B_PWS_COUNT(count));
|
||||
ac_cmdbuf_emit(0xffffffff); /* GCR_SIZE */
|
||||
ac_cmdbuf_emit(0x01ffffff); /* GCR_SIZE_HI */
|
||||
ac_cmdbuf_emit(0); /* GCR_BASE_LO */
|
||||
ac_cmdbuf_emit(0); /* GCR_BASE_HI */
|
||||
ac_cmdbuf_emit(S_585_PWS_ENA(1));
|
||||
ac_cmdbuf_emit(S_586B_PWS_ENA(1));
|
||||
ac_cmdbuf_emit(gcr_cntl); /* GCR_CNTL (this has no effect if PWS_STAGE_SEL isn't PFP or ME) */
|
||||
ac_cmdbuf_end();
|
||||
}
|
||||
|
|
@ -196,34 +196,34 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx
|
|||
event_type != V_028A90_CS_DONE));
|
||||
|
||||
/* Extract GCR_CNTL fields because the encoding is different in RELEASE_MEM. */
|
||||
assert(G_586_GLI_INV(gcr_cntl) == 0);
|
||||
assert(gfx_level >= GFX12 || G_586_GL1_RANGE(gcr_cntl) == 0);
|
||||
const uint32_t glm_wb = G_586_GLM_WB(gcr_cntl);
|
||||
const uint32_t glm_inv = G_586_GLM_INV(gcr_cntl);
|
||||
const uint32_t glk_wb = G_586_GLK_WB(gcr_cntl);
|
||||
const uint32_t glk_inv = G_586_GLK_INV(gcr_cntl);
|
||||
const uint32_t glv_inv = G_586_GLV_INV(gcr_cntl);
|
||||
const uint32_t gl1_inv = G_586_GL1_INV(gcr_cntl);
|
||||
assert(G_586_GL2_US(gcr_cntl) == 0);
|
||||
assert(G_586_GL2_RANGE(gcr_cntl) == 0);
|
||||
assert(G_586_GL2_DISCARD(gcr_cntl) == 0);
|
||||
const uint32_t gl2_inv = G_586_GL2_INV(gcr_cntl);
|
||||
const uint32_t gl2_wb = G_586_GL2_WB(gcr_cntl);
|
||||
const uint32_t gcr_seq = G_586_SEQ(gcr_cntl);
|
||||
assert(G_587_GLI_INV(gcr_cntl) == 0);
|
||||
assert(gfx_level >= GFX12 || G_587_GL1_RANGE(gcr_cntl) == 0);
|
||||
const uint32_t glm_wb = G_587_GLM_WB(gcr_cntl);
|
||||
const uint32_t glm_inv = G_587_GLM_INV(gcr_cntl);
|
||||
const uint32_t glk_wb = G_587_GLK_WB(gcr_cntl);
|
||||
const uint32_t glk_inv = G_587_GLK_INV(gcr_cntl);
|
||||
const uint32_t glv_inv = G_587_GLV_INV(gcr_cntl);
|
||||
const uint32_t gl1_inv = G_587_GL1_INV(gcr_cntl);
|
||||
assert(G_587_GL2_US(gcr_cntl) == 0);
|
||||
assert(G_587_GL2_RANGE(gcr_cntl) == 0);
|
||||
assert(G_587_GL2_DISCARD(gcr_cntl) == 0);
|
||||
const uint32_t gl2_inv = G_587_GL2_INV(gcr_cntl);
|
||||
const uint32_t gl2_wb = G_587_GL2_WB(gcr_cntl);
|
||||
const uint32_t gcr_seq = G_587_SEQ(gcr_cntl);
|
||||
const bool ts = is_ts_event(event_type);
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
ac_cmdbuf_emit(PKT3(PKT3_RELEASE_MEM, 6, 0));
|
||||
ac_cmdbuf_emit(S_490_EVENT_TYPE(event_type) |
|
||||
S_490_EVENT_INDEX(ts ? 5 : 6) |
|
||||
(gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GL1_INV(gl1_inv)) |
|
||||
S_490_GLV_INV(glv_inv) |
|
||||
S_490_GL2_INV(gl2_inv) |
|
||||
S_490_GL2_WB(gl2_wb) |
|
||||
S_490_SEQ(gcr_seq) |
|
||||
S_490_GLK_WB(glk_wb) |
|
||||
S_490_GLK_INV(glk_inv) |
|
||||
S_490_PWS_ENABLE(1));
|
||||
ac_cmdbuf_emit(S_491_EVENT_TYPE(event_type) |
|
||||
S_491_EVENT_INDEX(ts ? 5 : 6) |
|
||||
(gfx_level >= GFX12 ? 0 : S_491_GLM_WB(glm_wb) | S_491_GLM_INV(glm_inv) | S_491_GL1_INV(gl1_inv)) |
|
||||
S_491_GLV_INV(glv_inv) |
|
||||
S_491_GL2_INV(gl2_inv) |
|
||||
S_491_GL2_WB(gl2_wb) |
|
||||
S_491_SEQ(gcr_seq) |
|
||||
S_491_GLK_WB(glk_wb) |
|
||||
S_491_GLK_INV(glk_inv) |
|
||||
S_491_PWS_ENABLE(1));
|
||||
ac_cmdbuf_emit(0); /* DST_SEL, INT_SEL, DATA_SEL */
|
||||
ac_cmdbuf_emit(0); /* ADDRESS_LO */
|
||||
ac_cmdbuf_emit(0); /* ADDRESS_HI */
|
||||
|
|
@ -403,14 +403,14 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
enum amd_ip_type ip_type, uint32_t engine,
|
||||
uint32_t gcr_cntl)
|
||||
{
|
||||
assert(engine == V_580_CP_PFP || engine == V_580_CP_ME);
|
||||
assert(engine == V_581B_CP_PFP || engine == V_581B_CP_ME);
|
||||
assert(gcr_cntl);
|
||||
|
||||
ac_cmdbuf_begin(cs);
|
||||
|
||||
if (gfx_level >= GFX10) {
|
||||
/* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */
|
||||
const uint32_t engine_flag = engine == V_580_CP_ME ? BITFIELD_BIT(31) : 0;
|
||||
const uint32_t engine_flag = engine == V_581B_CP_ME ? BITFIELD_BIT(31) : 0;
|
||||
|
||||
/* Flush caches. This doesn't wait for idle. */
|
||||
ac_cmdbuf_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
|
|
|
|||
|
|
@ -253,7 +253,7 @@ static void ac_ib_gather_context_rolls(struct ac_context_roll_ctx *ctx, uint32_t
|
|||
break;
|
||||
|
||||
case PKT3_ACQUIRE_MEM:
|
||||
if (G_580_PWS_ENA2(ib[cur_dw])) {
|
||||
if (G_581B_PWS_ENA2(ib[cur_dw])) {
|
||||
ac_record_wait_idle(ctx);
|
||||
} else {
|
||||
ac_roll_context(ctx);
|
||||
|
|
@ -266,7 +266,7 @@ static void ac_ib_gather_context_rolls(struct ac_context_roll_ctx *ctx, uint32_t
|
|||
break;
|
||||
|
||||
case PKT3_EVENT_WRITE:
|
||||
if (G_490_EVENT_TYPE(ib[cur_dw]) == V_028A90_PS_PARTIAL_FLUSH)
|
||||
if (G_491_EVENT_TYPE(ib[cur_dw]) == V_028A90_PS_PARTIAL_FLUSH)
|
||||
ac_record_wait_idle(ctx);
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -463,11 +463,11 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||
break;
|
||||
case PKT3_CP_DMA:
|
||||
/* GFX6 */
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_415_COMMAND, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_411_CP_DMA_WORD0, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_412_CP_DMA_WORD1, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_413_CP_DMA_WORD2, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_414_CP_DMA_WORD3, ac_ib_get(ib), ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_415_CP_DMA_COMMAND, ac_ib_get(ib), ~0);
|
||||
break;
|
||||
case PKT3_DMA_DATA: {
|
||||
if (ib->gfx_level >= GFX9) {
|
||||
|
|
@ -482,29 +482,29 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||
uint64_t dst_addr = ac_ib_get64(ib);
|
||||
|
||||
uint32_t command = ac_ib_get(ib);
|
||||
uint32_t size = G_415_BYTE_COUNT_GFX6(command);
|
||||
uint32_t size = G_415_BYTE_COUNT(command);
|
||||
|
||||
uint32_t src_sel = G_501_SRC_SEL(header);
|
||||
bool src_mem = (src_sel == V_501_SRC_ADDR && G_415_SAS(command) == V_415_MEMORY) ||
|
||||
src_sel == V_411_SRC_ADDR_TC_L2;
|
||||
src_sel == V_501_SRC_ADDR_USING_L2;
|
||||
|
||||
uint32_t dst_sel = G_501_DST_SEL(header);
|
||||
bool dst_mem = (dst_sel == V_501_DST_ADDR && G_415_DAS(command) == V_415_MEMORY) ||
|
||||
dst_sel == V_411_DST_ADDR_TC_L2;
|
||||
dst_sel == V_501_DST_ADDR_USING_L2;
|
||||
|
||||
print_addr(ib, "SRC_ADDR", src_addr, src_mem ? size : AC_ADDR_SIZE_NOT_MEMORY);
|
||||
print_addr(ib, "DST_ADDR", dst_addr, dst_mem ? size : AC_ADDR_SIZE_NOT_MEMORY);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_415_COMMAND, command, ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_415_CP_DMA_COMMAND, command, ~0);
|
||||
break;
|
||||
}
|
||||
case PKT3_INDIRECT_BUFFER_SI:
|
||||
case PKT3_INDIRECT_BUFFER: {
|
||||
uint32_t base_lo_dw = ac_ib_get(ib);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_3F1_IB_BASE_LO, base_lo_dw, ~0);
|
||||
uint32_t base_hi_dw = ac_ib_get(ib);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_3F2_IB_BASE_HI, base_hi_dw, ~0);
|
||||
uint32_t control_dw = ac_ib_get(ib);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_3F2_IB_CONTROL, control_dw, ~0);
|
||||
ac_dump_reg(f, ib->gfx_level, ib->family, R_3F3_IB_CONTROL, control_dw, ~0);
|
||||
|
||||
if (!ib->addr_callback)
|
||||
break;
|
||||
|
|
@ -516,9 +516,9 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||
if (!data)
|
||||
break;
|
||||
|
||||
if (G_3F2_CHAIN(control_dw)) {
|
||||
if (G_3F3_CHAIN(control_dw)) {
|
||||
ib->ib = data;
|
||||
ib->num_dw = G_3F2_IB_SIZE(control_dw);
|
||||
ib->num_dw = G_3F3_IB_SIZE(control_dw);
|
||||
ib->cur_dw = 0;
|
||||
return;
|
||||
}
|
||||
|
|
@ -526,7 +526,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
|
|||
struct ac_ib_parser ib_recurse;
|
||||
memcpy(&ib_recurse, ib, sizeof(ib_recurse));
|
||||
ib_recurse.ib = data;
|
||||
ib_recurse.num_dw = G_3F2_IB_SIZE(control_dw);
|
||||
ib_recurse.num_dw = G_3F3_IB_SIZE(control_dw);
|
||||
ib_recurse.cur_dw = 0;
|
||||
if (ib_recurse.trace_id_count) {
|
||||
if (*current_trace_id == *ib->trace_ids) {
|
||||
|
|
|
|||
|
|
@ -3002,9 +3002,9 @@ struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *i
|
|||
* Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory.
|
||||
*/
|
||||
ac_pm4_cmd_add(pm4, PKT3(PKT3_RELEASE_MEM, 6, 0));
|
||||
ac_pm4_cmd_add(pm4, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) |
|
||||
S_490_EVENT_INDEX(5) |
|
||||
S_490_PWS_ENABLE(1));
|
||||
ac_pm4_cmd_add(pm4, S_491_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) |
|
||||
S_491_EVENT_INDEX(5) |
|
||||
S_491_PWS_ENABLE(1));
|
||||
ac_pm4_cmd_add(pm4, 0); /* DST_SEL, INT_SEL, DATA_SEL */
|
||||
ac_pm4_cmd_add(pm4, 0); /* ADDRESS_LO */
|
||||
ac_pm4_cmd_add(pm4, 0); /* ADDRESS_HI */
|
||||
|
|
@ -3012,28 +3012,28 @@ struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *i
|
|||
ac_pm4_cmd_add(pm4, 0); /* DATA_HI */
|
||||
ac_pm4_cmd_add(pm4, 0); /* INT_CTXID */
|
||||
|
||||
unsigned gcr_cntl = S_586_GL2_INV(1) | S_586_GL2_WB(1) |
|
||||
(info->gfx_level >= GFX12 ? 0 : S_586_GLM_INV(1) | S_586_GLM_WB(1) | S_586_GL1_INV(1)) |
|
||||
S_586_GLV_INV(1) |
|
||||
S_586_GLK_INV(1) | S_586_GLI_INV(V_586_GLI_ALL);
|
||||
unsigned gcr_cntl = S_587_GL2_INV(1) | S_587_GL2_WB(1) |
|
||||
(info->gfx_level >= GFX12 ? 0 : S_587_GLM_INV(1) | S_587_GLM_WB(1) | S_587_GL1_INV(1)) |
|
||||
S_587_GLV_INV(1) |
|
||||
S_587_GLK_INV(1) | S_587_GLI_INV(V_587_GLI_ALL);
|
||||
|
||||
/* Wait for the PWS counter. */
|
||||
ac_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
ac_pm4_cmd_add(pm4, S_580_PWS_STAGE_SEL(V_580_CP_PFP) |
|
||||
S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) |
|
||||
S_580_PWS_ENA2(1) |
|
||||
S_580_PWS_COUNT(0));
|
||||
ac_pm4_cmd_add(pm4, S_581B_PWS_STAGE_SEL(V_581B_CP_PFP) |
|
||||
S_581B_PWS_COUNTER_SEL(V_581B_TS_SELECT) |
|
||||
S_581B_PWS_ENA2(1) |
|
||||
S_581B_PWS_COUNT(0));
|
||||
ac_pm4_cmd_add(pm4, 0xffffffff); /* GCR_SIZE */
|
||||
ac_pm4_cmd_add(pm4, 0x01ffffff); /* GCR_SIZE_HI */
|
||||
ac_pm4_cmd_add(pm4, 0); /* GCR_BASE_LO */
|
||||
ac_pm4_cmd_add(pm4, 0); /* GCR_BASE_HI */
|
||||
ac_pm4_cmd_add(pm4, S_585_PWS_ENA(1));
|
||||
ac_pm4_cmd_add(pm4, S_586B_PWS_ENA(1));
|
||||
ac_pm4_cmd_add(pm4, gcr_cntl); /* GCR_CNTL */
|
||||
} else if (info->gfx_level >= GFX10) {
|
||||
unsigned gcr_cntl = S_586_GL2_INV(1) | S_586_GL2_WB(1) |
|
||||
S_586_GLM_INV(1) | S_586_GLM_WB(1) |
|
||||
S_586_GL1_INV(1) | S_586_GLV_INV(1) |
|
||||
S_586_GLK_INV(1) | S_586_GLI_INV(V_586_GLI_ALL);
|
||||
unsigned gcr_cntl = S_587_GL2_INV(1) | S_587_GL2_WB(1) |
|
||||
S_587_GLM_INV(1) | S_587_GLM_WB(1) |
|
||||
S_587_GL1_INV(1) | S_587_GLV_INV(1) |
|
||||
S_587_GLK_INV(1) | S_587_GLI_INV(V_587_GLI_ALL);
|
||||
|
||||
ac_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
ac_pm4_cmd_add(pm4, 0); /* CP_COHER_CNTL */
|
||||
|
|
|
|||
|
|
@ -1812,10 +1812,10 @@ ac_emit_spm_muxsel(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||
|
||||
/* Write the muxsel line configuration with MUXSEL_DATA. */
|
||||
ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
|
||||
ac_cmdbuf_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
|
||||
S_370_WR_CONFIRM(1) |
|
||||
S_370_ENGINE_SEL(V_370_ME) |
|
||||
S_370_WR_ONE_ADDR(1));
|
||||
ac_cmdbuf_emit(S_371_DST_SEL(V_371_MEM_MAPPED_REGISTER) |
|
||||
S_371_WR_CONFIRM(V_371_WAIT_FOR_WRITE_CONFIRMATION) |
|
||||
S_371_ENGINE_SEL(V_371_MICRO_ENGINE) |
|
||||
S_371_ADDR_INCR(V_371_DO_NOT_INCREMENT_ADDRESS));
|
||||
ac_cmdbuf_emit(rlc_muxsel_data >> 2);
|
||||
ac_cmdbuf_emit(0);
|
||||
ac_cmdbuf_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ packet_field_register_map = {
|
|||
# (name, first_bit): (register, mask)
|
||||
('COHER_CNTL', 0): ('R_0301F0_CP_COHER_CNTL', ~0),
|
||||
('EVENT_TYPE', 0): ('R_028A90_VGT_EVENT_INITIATOR', 0x3F),
|
||||
('GCR_CNTL', 0): ('R_586_GCR_CNTL', ~0),
|
||||
('GCR_CNTL', 0): ('R_587_GCR_CNTL', ~0),
|
||||
('DISPATCH_INITIATOR', 0): ('R_00B800_COMPUTE_DISPATCH_INITIATOR', ~0),
|
||||
('DRAW_INITIATOR', 0): ('R_0287F0_VGT_DRAW_INITIATOR', ~0),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,19 @@
|
|||
{
|
||||
"comment": "This file is only for definitions not present in the generated packet headers.",
|
||||
"enums": {
|
||||
"COMMAND__SAIC": {
|
||||
"CP_DMA_COMMAND__SAIC": {
|
||||
"entries": [
|
||||
{"name": "INCREMENT", "value": 0},
|
||||
{"name": "NO_INCREMENT", "value": 1}
|
||||
]
|
||||
},
|
||||
"COMMAND__SAS": {
|
||||
"CP_DMA_COMMAND__SAS": {
|
||||
"entries": [
|
||||
{"name": "MEMORY", "value": 0},
|
||||
{"name": "REGISTER", "value": 1}
|
||||
]
|
||||
},
|
||||
"COMMAND__SRC_SWAP": {
|
||||
"CP_DMA_COMMAND__SRC_SWAP": {
|
||||
"entries": [
|
||||
{"name": "NONE", "value": 0},
|
||||
{"name": "8_IN_16", "value": 1},
|
||||
|
|
@ -20,31 +21,9 @@
|
|||
{"name": "8_IN_64", "value": 3}
|
||||
]
|
||||
},
|
||||
"CONTROL__DST_SEL": {
|
||||
"WRITE_DATA_CONTROL__DST_SEL": {
|
||||
"entries": [
|
||||
{"name": "MEM_MAPPED_REGISTER", "value": 0},
|
||||
{"comment": "sync across GRBM", "name": "MEM_GRBM", "value": 1},
|
||||
{"name": "TC_L2", "value": 2},
|
||||
{"name": "GDS", "value": 3},
|
||||
{"name": "RESERVED", "value": 4}
|
||||
]
|
||||
},
|
||||
"CONTROL__DST_SEL_cik": {
|
||||
"entries": [
|
||||
{"name": "MEM_MAPPED_REGISTER", "value": 0},
|
||||
{"comment": "sync across GRBM", "name": "MEM_GRBM", "value": 1},
|
||||
{"name": "TC_L2", "value": 2},
|
||||
{"name": "GDS", "value": 3},
|
||||
{"name": "RESERVED", "value": 4},
|
||||
{"name": "MEM", "value": 5}
|
||||
]
|
||||
},
|
||||
"CONTROL__ENGINE_SEL": {
|
||||
"entries": [
|
||||
{"name": "ME", "value": 0},
|
||||
{"name": "PFP", "value": 1},
|
||||
{"name": "CE", "value": 2},
|
||||
{"name": "DE", "value": 3}
|
||||
{"comment": "sync across GRBM", "name": "MEM_GRBM", "value": 1}
|
||||
]
|
||||
},
|
||||
"CP_DMA_WORD1__DST_SEL": {
|
||||
|
|
@ -60,14 +39,6 @@
|
|||
{"name": "DST_ADDR_TC_L2", "value": 3}
|
||||
]
|
||||
},
|
||||
"CP_DMA_WORD1__DST_SEL_gfx9": {
|
||||
"entries": [
|
||||
{"name": "DST_ADDR", "value": 0},
|
||||
{"comment": "program DAS to 1 as well", "name": "GDS", "value": 1},
|
||||
{"name": "NOWHERE", "value": 2},
|
||||
{"name": "DST_ADDR_TC_L2", "value": 3}
|
||||
]
|
||||
},
|
||||
"CP_DMA_WORD1__ENGINE": {
|
||||
"entries": [
|
||||
{"name": "ME", "value": 0},
|
||||
|
|
@ -118,152 +89,48 @@
|
|||
{"name": "SEQ_FORWARD", "value": 1},
|
||||
{"name": "SEQ_REVERSE", "value": 2}
|
||||
]
|
||||
},
|
||||
"PWS_STAGE_SEL": {
|
||||
"entries": [
|
||||
{"name": "PRE_DEPTH", "value": 0},
|
||||
{"name": "PRE_SHADER", "value": 1},
|
||||
{"name": "PRE_COLOR", "value": 2},
|
||||
{"name": "PRE_PIX_SHADER", "value": 3},
|
||||
{"name": "CP_PFP", "value": 4},
|
||||
{"name": "CP_ME", "value": 5}
|
||||
]
|
||||
},
|
||||
"PWS_COUNTER_SEL": {
|
||||
"entries": [
|
||||
{"name": "TS_SELECT", "value": 0},
|
||||
{"name": "PS_SELECT", "value": 1},
|
||||
{"name": "CS_SELECT", "value": 2}
|
||||
]
|
||||
},
|
||||
"VGT_EVENT_TYPE_gfx11": {
|
||||
"entries": [
|
||||
{"name": "Reserved_0x00", "value": 0},
|
||||
{"name": "SAMPLE_STREAMOUTSTATS1", "value": 1},
|
||||
{"name": "SAMPLE_STREAMOUTSTATS2", "value": 2},
|
||||
{"name": "SAMPLE_STREAMOUTSTATS3", "value": 3},
|
||||
{"name": "CACHE_FLUSH_TS", "value": 4},
|
||||
{"name": "CONTEXT_DONE", "value": 5},
|
||||
{"name": "CACHE_FLUSH", "value": 6},
|
||||
{"name": "CS_PARTIAL_FLUSH", "value": 7},
|
||||
{"name": "VGT_STREAMOUT_SYNC", "value": 8},
|
||||
{"name": "Reserved_0x09", "value": 9},
|
||||
{"name": "VGT_STREAMOUT_RESET", "value": 10},
|
||||
{"name": "END_OF_PIPE_INCR_DE", "value": 11},
|
||||
{"name": "END_OF_PIPE_IB_END", "value": 12},
|
||||
{"name": "RST_PIX_CNT", "value": 13},
|
||||
{"name": "BREAK_BATCH", "value": 14},
|
||||
{"name": "VS_PARTIAL_FLUSH", "value": 15},
|
||||
{"name": "PS_PARTIAL_FLUSH", "value": 16},
|
||||
{"name": "FLUSH_HS_OUTPUT", "value": 17},
|
||||
{"name": "FLUSH_DFSM", "value": 18},
|
||||
{"name": "RESET_TO_LOWEST_VGT", "value": 19},
|
||||
{"name": "CACHE_FLUSH_AND_INV_TS_EVENT", "value": 20},
|
||||
{"name": "WAIT_SYNC", "value": 21},
|
||||
{"name": "CACHE_FLUSH_AND_INV_EVENT", "value": 22},
|
||||
{"name": "PERFCOUNTER_START", "value": 23},
|
||||
{"name": "PERFCOUNTER_STOP", "value": 24},
|
||||
{"name": "PIPELINESTAT_START", "value": 25},
|
||||
{"name": "PIPELINESTAT_STOP", "value": 26},
|
||||
{"name": "PERFCOUNTER_SAMPLE", "value": 27},
|
||||
{"name": "FLUSH_ES_OUTPUT", "value": 28},
|
||||
{"name": "BIN_CONF_OVERRIDE_CHECK", "value": 29},
|
||||
{"name": "SAMPLE_PIPELINESTAT", "value": 30},
|
||||
{"name": "SO_VGTSTREAMOUT_FLUSH", "value": 31},
|
||||
{"name": "SAMPLE_STREAMOUTSTATS", "value": 32},
|
||||
{"name": "RESET_VTX_CNT", "value": 33},
|
||||
{"name": "BLOCK_CONTEXT_DONE", "value": 34},
|
||||
{"name": "CS_CONTEXT_DONE", "value": 35},
|
||||
{"name": "VGT_FLUSH", "value": 36},
|
||||
{"name": "TGID_ROLLOVER", "value": 37},
|
||||
{"name": "SQ_NON_EVENT", "value": 38},
|
||||
{"name": "SC_SEND_DB_VPZ", "value": 39},
|
||||
{"name": "BOTTOM_OF_PIPE_TS", "value": 40},
|
||||
{"name": "FLUSH_SX_TS", "value": 41},
|
||||
{"name": "DB_CACHE_FLUSH_AND_INV", "value": 42},
|
||||
{"name": "FLUSH_AND_INV_DB_DATA_TS", "value": 43},
|
||||
{"name": "FLUSH_AND_INV_DB_META", "value": 44},
|
||||
{"name": "FLUSH_AND_INV_CB_DATA_TS", "value": 45},
|
||||
{"name": "FLUSH_AND_INV_CB_META", "value": 46},
|
||||
{"name": "CS_DONE", "value": 47},
|
||||
{"name": "PS_DONE", "value": 48},
|
||||
{"name": "FLUSH_AND_INV_CB_PIXEL_DATA", "value": 49},
|
||||
{"name": "SX_CB_RAT_ACK_REQUEST", "value": 50},
|
||||
{"name": "THREAD_TRACE_START", "value": 51},
|
||||
{"name": "THREAD_TRACE_STOP", "value": 52},
|
||||
{"name": "THREAD_TRACE_MARKER", "value": 53},
|
||||
{"name": "THREAD_TRACE_DRAW", "value": 54},
|
||||
{"name": "THREAD_TRACE_FINISH", "value": 55},
|
||||
{"name": "PIXEL_PIPE_STAT_CONTROL", "value": 56},
|
||||
{"name": "PIXEL_PIPE_STAT_DUMP", "value": 57},
|
||||
{"name": "PIXEL_PIPE_STAT_RESET", "value": 58},
|
||||
{"name": "CONTEXT_SUSPEND", "value": 59},
|
||||
{"name": "OFFCHIP_HS_DEALLOC", "value": 60},
|
||||
{"name": "ENABLE_NGG_PIPELINE", "value": 61},
|
||||
{"name": "ENABLE_LEGACY_PIPELINE", "value": 62},
|
||||
{"name": "DRAW_DONE", "value": 63}
|
||||
]
|
||||
}
|
||||
},
|
||||
"register_mappings": [
|
||||
{
|
||||
"comment": "This is at offset 0x415 instead of 0x414 due to a conflict with SQ_WAVE_GPR_ALLOC",
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
|
||||
"map": {"at": 1045, "to": "pkt3"},
|
||||
"name": "COMMAND",
|
||||
"type_ref": "COMMAND"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1045, "to": "pkt3"},
|
||||
"name": "COMMAND",
|
||||
"type_ref": "COMMAND_gfx9"
|
||||
"name": "CP_DMA_COMMAND",
|
||||
"type_ref": "CP_DMA_COMMAND"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6"],
|
||||
"map": {"at": 880, "to": "pkt3"},
|
||||
"name": "CONTROL",
|
||||
"type_ref": "CONTROL"
|
||||
"map": {"at": 881, "to": "pkt3"},
|
||||
"name": "WRITE_DATA_CONTROL",
|
||||
"type_ref": "WRITE_DATA_CONTROL"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 880, "to": "pkt3"},
|
||||
"name": "CONTROL",
|
||||
"type_ref": "CONTROL_cik"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1040, "to": "pkt3"},
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
|
||||
"map": {"at": 1041, "to": "pkt3"},
|
||||
"name": "CP_DMA_WORD0",
|
||||
"type_ref": "CP_DMA_WORD0"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6"],
|
||||
"map": {"at": 1041, "to": "pkt3"},
|
||||
"map": {"at": 1042, "to": "pkt3"},
|
||||
"name": "CP_DMA_WORD1",
|
||||
"type_ref": "CP_DMA_WORD1"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx7", "gfx8", "gfx81"],
|
||||
"map": {"at": 1041, "to": "pkt3"},
|
||||
"map": {"at": 1042, "to": "pkt3"},
|
||||
"name": "CP_DMA_WORD1",
|
||||
"type_ref": "CP_DMA_WORD1_cik"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1041, "to": "pkt3"},
|
||||
"name": "CP_DMA_WORD1",
|
||||
"type_ref": "CP_DMA_WORD1_gfx9"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1042, "to": "pkt3"},
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
|
||||
"map": {"at": 1043, "to": "pkt3"},
|
||||
"name": "CP_DMA_WORD2",
|
||||
"type_ref": "CP_DMA_WORD2"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1043, "to": "pkt3"},
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81"],
|
||||
"map": {"at": 1044, "to": "pkt3"},
|
||||
"name": "CP_DMA_WORD3",
|
||||
"type_ref": "CP_DMA_WORD3"
|
||||
},
|
||||
|
|
@ -279,128 +146,58 @@
|
|||
"name": "DMA_DATA_WORD0",
|
||||
"type_ref": "DMA_DATA_WORD0_cik"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1281, "to": "pkt3"},
|
||||
"name": "DMA_DATA_WORD0",
|
||||
"type_ref": "DMA_DATA_WORD0_gfx9"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 882, "to": "pkt3"},
|
||||
"name": "DST_ADDR_HI"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1286, "to": "pkt3"},
|
||||
"name": "DST_ADDR_HI"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 881, "to": "pkt3"},
|
||||
"name": "DST_ADDR_LO"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1285, "to": "pkt3"},
|
||||
"name": "DST_ADDR_LO"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1414, "to": "pkt3"},
|
||||
"map": {"at": 1415, "to": "pkt3"},
|
||||
"name": "GCR_CNTL",
|
||||
"type_ref": "GCR_CNTL"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1009, "to": "pkt3"},
|
||||
"map": {"at": 1010, "to": "pkt3"},
|
||||
"name": "IB_BASE_HI"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1008, "to": "pkt3"},
|
||||
"map": {"at": 1009, "to": "pkt3"},
|
||||
"name": "IB_BASE_LO"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1010, "to": "pkt3"},
|
||||
"map": {"at": 1011, "to": "pkt3"},
|
||||
"name": "IB_CONTROL",
|
||||
"type_ref": "IB_CONTROL"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx10", "gfx103"],
|
||||
"map": {"at": 1168, "to": "pkt3"},
|
||||
"map": {"at": 1169, "to": "pkt3"},
|
||||
"name": "RELEASE_MEM_OP",
|
||||
"type_ref": "RELEASE_MEM_OP"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx11", "gfx12"],
|
||||
"map": {"at": 1168, "to": "pkt3"},
|
||||
"map": {"at": 1169, "to": "pkt3"},
|
||||
"name": "RELEASE_MEM_OP",
|
||||
"type_ref": "RELEASE_MEM_OP_gfx11"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1283, "to": "pkt3"},
|
||||
"name": "SRC_ADDR_HI"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"],
|
||||
"map": {"at": 1282, "to": "pkt3"},
|
||||
"name": "SRC_ADDR_LO"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx11", "gfx12"],
|
||||
"map": {"at": 1408, "to": "pkt3"},
|
||||
"name": "ACQUIRE_MEM_PWS_2",
|
||||
"type_ref": "ACQUIRE_MEM_PWS_2"
|
||||
},
|
||||
{
|
||||
"chips": ["gfx11", "gfx12"],
|
||||
"map": {"at": 1413, "to": "pkt3"},
|
||||
"name": "ACQUIRE_MEM_PWS_7",
|
||||
"type_ref": "ACQUIRE_MEM_PWS_7"
|
||||
}
|
||||
],
|
||||
"register_types": {
|
||||
"COMMAND": {
|
||||
"CP_DMA_COMMAND": {
|
||||
"fields": [
|
||||
{"bits": [0, 20], "name": "BYTE_COUNT"},
|
||||
{"bits": [21, 21], "name": "DISABLE_WR_CONFIRM"},
|
||||
{"bits": [22, 23], "enum_ref": "COMMAND__SRC_SWAP", "name": "SRC_SWAP"},
|
||||
{"bits": [24, 25], "enum_ref": "COMMAND__SRC_SWAP", "name": "DST_SWAP"},
|
||||
{"bits": [26, 26], "enum_ref": "COMMAND__SAS", "name": "SAS"},
|
||||
{"bits": [27, 27], "enum_ref": "COMMAND__SAS", "name": "DAS"},
|
||||
{"bits": [28, 28], "enum_ref": "COMMAND__SAIC", "name": "SAIC"},
|
||||
{"bits": [29, 29], "enum_ref": "COMMAND__SAIC", "name": "DAIC"},
|
||||
{"bits": [22, 23], "enum_ref": "CP_DMA_COMMAND__SRC_SWAP", "name": "SRC_SWAP"},
|
||||
{"bits": [24, 25], "enum_ref": "CP_DMA_COMMAND__SRC_SWAP", "name": "DST_SWAP"},
|
||||
{"bits": [26, 26], "enum_ref": "CP_DMA_COMMAND__SAS", "name": "SAS"},
|
||||
{"bits": [27, 27], "enum_ref": "CP_DMA_COMMAND__SAS", "name": "DAS"},
|
||||
{"bits": [28, 28], "enum_ref": "CP_DMA_COMMAND__SAIC", "name": "SAIC"},
|
||||
{"bits": [29, 29], "enum_ref": "CP_DMA_COMMAND__SAIC", "name": "DAIC"},
|
||||
{"bits": [30, 30], "name": "RAW_WAIT"}
|
||||
]
|
||||
},
|
||||
"COMMAND_gfx9": {
|
||||
"WRITE_DATA_CONTROL": {
|
||||
"fields": [
|
||||
{"bits": [0, 25], "name": "BYTE_COUNT"},
|
||||
{"bits": [26, 26], "enum_ref": "COMMAND__SAS", "name": "SAS"},
|
||||
{"bits": [27, 27], "enum_ref": "COMMAND__SAS", "name": "DAS"},
|
||||
{"bits": [28, 28], "enum_ref": "COMMAND__SAIC", "name": "SAIC"},
|
||||
{"bits": [29, 29], "enum_ref": "COMMAND__SAIC", "name": "DAIC"},
|
||||
{"bits": [30, 30], "name": "RAW_WAIT"},
|
||||
{"bits": [31, 31], "name": "DISABLE_WR_CONFIRM"}
|
||||
]
|
||||
},
|
||||
"CONTROL": {
|
||||
"fields": [
|
||||
{"bits": [8, 11], "enum_ref": "CONTROL__DST_SEL", "name": "DST_SEL"},
|
||||
{"bits": [16, 16], "name": "WR_ONE_ADDR"},
|
||||
{"bits": [20, 20], "name": "WR_CONFIRM"},
|
||||
{"bits": [30, 31], "enum_ref": "CONTROL__ENGINE_SEL", "name": "ENGINE_SEL"}
|
||||
]
|
||||
},
|
||||
"CONTROL_cik": {
|
||||
"fields": [
|
||||
{"bits": [8, 11], "enum_ref": "CONTROL__DST_SEL_cik", "name": "DST_SEL"},
|
||||
{"bits": [16, 16], "name": "WR_ONE_ADDR"},
|
||||
{"bits": [20, 20], "name": "WR_CONFIRM"},
|
||||
{"bits": [30, 31], "enum_ref": "CONTROL__ENGINE_SEL", "name": "ENGINE_SEL"}
|
||||
{"bits": [8, 11], "enum_ref": "WRITE_DATA_CONTROL__DST_SEL", "name": "DST_SEL"}
|
||||
]
|
||||
},
|
||||
"CP_DMA_WORD0": {
|
||||
|
|
@ -426,15 +223,6 @@
|
|||
{"bits": [31, 31], "name": "CP_SYNC"}
|
||||
]
|
||||
},
|
||||
"CP_DMA_WORD1_gfx9": {
|
||||
"fields": [
|
||||
{"bits": [0, 15], "name": "SRC_ADDR_HI"},
|
||||
{"bits": [20, 21], "enum_ref": "CP_DMA_WORD1__DST_SEL_gfx9", "name": "DST_SEL"},
|
||||
{"bits": [27, 27], "enum_ref": "CP_DMA_WORD1__ENGINE", "name": "ENGINE"},
|
||||
{"bits": [29, 30], "enum_ref": "CP_DMA_WORD1__SRC_SEL_cik", "name": "SRC_SEL"},
|
||||
{"bits": [31, 31], "name": "CP_SYNC"}
|
||||
]
|
||||
},
|
||||
"CP_DMA_WORD2": {
|
||||
"fields": [
|
||||
{"bits": [0, 31], "name": "DST_ADDR_LO"}
|
||||
|
|
@ -463,16 +251,6 @@
|
|||
{"bits": [31, 31], "name": "CP_SYNC"}
|
||||
]
|
||||
},
|
||||
"DMA_DATA_WORD0_gfx9": {
|
||||
"fields": [
|
||||
{"bits": [0, 0], "enum_ref": "CP_DMA_WORD1__ENGINE", "name": "ENGINE"},
|
||||
{"bits": [13, 14], "name": "SRC_CACHE_POLICY"},
|
||||
{"bits": [20, 21], "enum_ref": "CP_DMA_WORD1__DST_SEL_gfx9", "name": "DST_SEL"},
|
||||
{"bits": [25, 26], "name": "DST_CACHE_POLICY"},
|
||||
{"bits": [29, 30], "enum_ref": "CP_DMA_WORD1__SRC_SEL_cik", "name": "SRC_SEL"},
|
||||
{"bits": [31, 31], "name": "CP_SYNC"}
|
||||
]
|
||||
},
|
||||
"GCR_CNTL": {
|
||||
"fields": [
|
||||
{"bits": [0, 1], "enum_ref": "GCR_GLI_INV", "name": "GLI_INV"},
|
||||
|
|
@ -502,8 +280,6 @@
|
|||
},
|
||||
"RELEASE_MEM_OP": {
|
||||
"fields": [
|
||||
{"bits": [0, 5], "name": "EVENT_TYPE"},
|
||||
{"bits": [8, 11], "name": "EVENT_INDEX"},
|
||||
{"bits": [12, 12], "name": "GLM_WB"},
|
||||
{"bits": [13, 13], "name": "GLM_INV"},
|
||||
{"bits": [14, 14], "name": "GLV_INV"},
|
||||
|
|
@ -518,9 +294,6 @@
|
|||
},
|
||||
"RELEASE_MEM_OP_gfx11": {
|
||||
"fields": [
|
||||
{"bits": [0, 5], "enum_ref": "VGT_EVENT_TYPE_gfx11", "name": "EVENT_TYPE"},
|
||||
{"bits": [7, 7], "name": "WAIT_SYNC"},
|
||||
{"bits": [8, 11], "name": "EVENT_INDEX"},
|
||||
{"bits": [12, 12], "name": "GLM_WB"},
|
||||
{"bits": [13, 13], "name": "GLM_INV"},
|
||||
{"bits": [14, 14], "name": "GLV_INV"},
|
||||
|
|
@ -531,24 +304,7 @@
|
|||
{"bits": [20, 20], "name": "GL2_INV"},
|
||||
{"bits": [21, 21], "name": "GL2_WB"},
|
||||
{"bits": [22, 23], "enum_ref": "GCR_SEQ", "name": "SEQ"},
|
||||
{"bits": [24, 24], "name": "GLK_WB"},
|
||||
{"bits": [25, 26], "name": "CACHE_POLICY"},
|
||||
{"bits": [28, 29], "name": "EXECUTE"},
|
||||
{"bits": [30, 30], "name": "GLK_INV"},
|
||||
{"bits": [31, 31], "name": "PWS_ENABLE"}
|
||||
]
|
||||
},
|
||||
"ACQUIRE_MEM_PWS_2": {
|
||||
"fields": [
|
||||
{"bits": [11, 13], "enum_ref": "PWS_STAGE_SEL", "name": "PWS_STAGE_SEL"},
|
||||
{"bits": [14, 15], "enum_ref": "PWS_COUNTER_SEL", "name": "PWS_COUNTER_SEL"},
|
||||
{"bits": [17, 17], "name": "PWS_ENA2"},
|
||||
{"bits": [18, 23], "name": "PWS_COUNT"}
|
||||
]
|
||||
},
|
||||
"ACQUIRE_MEM_PWS_7": {
|
||||
"fields": [
|
||||
{"bits": [31, 31], "name": "PWS_ENA"}
|
||||
{"bits": [24, 24], "name": "GLK_WB"}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -437,7 +437,7 @@ radv_update_memory_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const voi
|
|||
radv_emit_cache_flush(cmd_buffer);
|
||||
radeon_check_space(device->ws, cs->b, words + 4);
|
||||
|
||||
ac_emit_cp_write_data(cs->b, V_370_ME, mec ? V_370_MEM : V_370_MEM_GRBM, va, words, data, false);
|
||||
ac_emit_cp_write_data(cs->b, V_371_MICRO_ENGINE, mec ? V_371_MEMORY : V_371_MEM_GRBM, va, words, data, false);
|
||||
|
||||
if (radv_device_fault_detection_enabled(device))
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
|
|
|
|||
|
|
@ -1439,7 +1439,7 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
|
|||
va += offsetof(struct radv_trace_data, secondary_id);
|
||||
|
||||
++cmd_buffer->state.trace_id;
|
||||
radv_write_data(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id, false);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 1, &cmd_buffer->state.trace_id, false);
|
||||
|
||||
radeon_check_space(device->ws, cs->b, 2);
|
||||
|
||||
|
|
@ -1666,10 +1666,10 @@ radv_gang_finalize(struct radv_cmd_buffer *cmd_buffer)
|
|||
const uint32_t zero = 0;
|
||||
|
||||
/* Follower: write 0 to the leader->follower semaphore. */
|
||||
radv_cs_write_data(device, ace_cs, V_370_ME, leader2follower_va, 1, &zero, false);
|
||||
radv_cs_write_data(device, ace_cs, V_371_MICRO_ENGINE, leader2follower_va, 1, &zero, false);
|
||||
|
||||
/* Leader: write 0 to the follower->leader semaphore. */
|
||||
radv_write_data(cmd_buffer, V_370_ME, follower2leader_va, 1, &zero, false);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, follower2leader_va, 1, &zero, false);
|
||||
}
|
||||
|
||||
return radv_finalize_cmd_stream(device, cmd_buffer->gang.cs);
|
||||
|
|
@ -1747,7 +1747,7 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pip
|
|||
data[0] = pipeline_address;
|
||||
data[1] = pipeline_address >> 32;
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 2, data, false);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1762,7 +1762,7 @@ radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr
|
|||
data[0] = vb_ptr;
|
||||
data[1] = vb_ptr >> 32;
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 2, data, false);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1778,7 +1778,7 @@ radv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader
|
|||
data[0] = prolog_address;
|
||||
data[1] = prolog_address >> 32;
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 2, data, false);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1794,7 +1794,7 @@ radv_save_ps_epilog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader
|
|||
data[0] = epilog_address;
|
||||
data[1] = epilog_address >> 32;
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 2, data, false);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1824,7 +1824,7 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bi
|
|||
data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
|
||||
}
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data, false);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, MAX_SETS * 2, data, false);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -5048,7 +5048,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image
|
|||
|
||||
/* Use the fastest way when both aspects are used. */
|
||||
ASSERTED unsigned cdw_end =
|
||||
radv_cs_write_data_head(device, cs, V_370_ME, va, 2 * level_count, cmd_buffer->state.predicating);
|
||||
radv_cs_write_data_head(device, cs, V_371_MICRO_ENGINE, va, 2 * level_count, cmd_buffer->state.predicating);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -5073,7 +5073,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image
|
|||
value = ds_clear_value.stencil;
|
||||
}
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va, 1, &value, cmd_buffer->state.predicating);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 1, &value, cmd_buffer->state.predicating);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -5096,7 +5096,7 @@ radv_update_hiz_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *
|
|||
const uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
|
||||
|
||||
ASSERTED unsigned cdw_end =
|
||||
radv_cs_write_data_head(device, cs, V_370_PFP, va, level_count, cmd_buffer->state.predicating);
|
||||
radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, level_count, cmd_buffer->state.predicating);
|
||||
|
||||
radeon_begin(cs);
|
||||
for (uint32_t l = 0; l < level_count; l++)
|
||||
|
|
@ -5123,7 +5123,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra
|
|||
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
|
||||
|
||||
ASSERTED unsigned cdw_end =
|
||||
radv_cs_write_data_head(device, cs, V_370_PFP, va, level_count, cmd_buffer->state.predicating);
|
||||
radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, level_count, cmd_buffer->state.predicating);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -5224,7 +5224,7 @@ radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *
|
|||
uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel);
|
||||
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
|
||||
|
||||
ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, V_370_PFP, va, 2 * level_count, false);
|
||||
ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, 2 * level_count, false);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -5256,7 +5256,7 @@ radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *
|
|||
|
||||
assert(radv_dcc_enabled(image, range->baseMipLevel));
|
||||
|
||||
ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, V_370_PFP, va, 2 * level_count, false);
|
||||
ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, 2 * level_count, false);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -5314,7 +5314,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im
|
|||
uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel);
|
||||
|
||||
ASSERTED unsigned cdw_end =
|
||||
radv_cs_write_data_head(device, cs, V_370_ME, va, 2 * level_count, cmd_buffer->state.predicating);
|
||||
radv_cs_write_data_head(device, cs, V_371_MICRO_ENGINE, va, 2 * level_count, cmd_buffer->state.predicating);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
@ -7792,7 +7792,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
|
|||
cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
|
||||
cmd_buffer->gfx9_fence_va += fence_offset;
|
||||
|
||||
radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8);
|
||||
radv_emit_clear_data(cmd_buffer, V_371_PREFETCH_PARSER, cmd_buffer->gfx9_fence_va, 8);
|
||||
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
/* Allocate a buffer for the EOP bug on GFX9. */
|
||||
|
|
@ -7805,7 +7805,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
|
|||
cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
|
||||
cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
|
||||
|
||||
radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_eop_bug_va, 16 * num_db);
|
||||
radv_emit_clear_data(cmd_buffer, V_371_PREFETCH_PARSER, cmd_buffer->gfx9_eop_bug_va, 16 * num_db);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -10653,7 +10653,7 @@ radv_gfx12_emit_wa(const struct radv_device *device, const struct radv_cmd_state
|
|||
assert(pdev->info.gfx_level == GFX12);
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0));
|
||||
radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5));
|
||||
radeon_emit(S_491_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_491_EVENT_INDEX(5));
|
||||
radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */
|
||||
radeon_emit(0); /* ADDRESS_LO */
|
||||
radeon_emit(0); /* ADDRESS_HI */
|
||||
|
|
@ -15246,10 +15246,10 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipe
|
|||
|
||||
if (!(stageMask & ~top_of_pipe_flags) && cmd_buffer->qf != RADV_QUEUE_COMPUTE) {
|
||||
/* Just need to sync the PFP engine. */
|
||||
radv_write_data(cmd_buffer, V_370_PFP, va, 1, &value, false);
|
||||
radv_write_data(cmd_buffer, V_371_PREFETCH_PARSER, va, 1, &value, false);
|
||||
} else if (!(stageMask & ~post_index_fetch_flags)) {
|
||||
/* Sync ME because PFP reads index and indirect buffers. */
|
||||
radv_write_data(cmd_buffer, V_370_ME, va, 1, &value, false);
|
||||
radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 1, &value, false);
|
||||
} else {
|
||||
unsigned event_type;
|
||||
|
||||
|
|
|
|||
|
|
@ -34,9 +34,7 @@
|
|||
static inline unsigned
|
||||
cp_dma_max_byte_count(enum amd_gfx_level gfx_level)
|
||||
{
|
||||
unsigned max = gfx_level >= GFX11 ? 32767
|
||||
: gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u)
|
||||
: S_415_BYTE_COUNT_GFX6(~0u);
|
||||
unsigned max = gfx_level >= GFX11 ? 32767 : gfx_level >= GFX9 ? S_506_BYTE_COUNT(~0u) : S_415_BYTE_COUNT(~0u);
|
||||
|
||||
/* make it aligned for optimal performance */
|
||||
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
||||
|
|
@ -61,25 +59,25 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool
|
|||
|
||||
radeon_check_space(device->ws, cs->b, 9);
|
||||
if (pdev->info.gfx_level >= GFX9)
|
||||
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||
command |= S_506_BYTE_COUNT(size);
|
||||
else
|
||||
command |= S_415_BYTE_COUNT_GFX6(size);
|
||||
command |= S_415_BYTE_COUNT(size);
|
||||
|
||||
/* Sync flags. */
|
||||
if (flags & CP_DMA_SYNC)
|
||||
header |= S_411_CP_SYNC(1);
|
||||
header |= S_501_CP_SYNC(1);
|
||||
|
||||
if (flags & CP_DMA_RAW_WAIT)
|
||||
command |= S_415_RAW_WAIT(1);
|
||||
command |= S_506_RAW_WAIT(1);
|
||||
|
||||
/* Src and dst flags. */
|
||||
if (cp_dma_tc_l2_flag)
|
||||
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
|
||||
header |= S_501_DST_SEL(V_501_DST_ADDR_USING_L2);
|
||||
|
||||
if (flags & CP_DMA_CLEAR)
|
||||
header |= S_411_SRC_SEL(V_411_DATA);
|
||||
header |= S_501_SRC_SEL(V_501_DATA);
|
||||
else if (cp_dma_tc_l2_flag)
|
||||
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
|
||||
header |= S_501_SRC_SEL(V_501_SRC_ADDR_USING_L2);
|
||||
|
||||
radeon_begin(cs);
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
|
|
@ -92,7 +90,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool
|
|||
radeon_emit(command);
|
||||
} else {
|
||||
assert(!cp_dma_tc_l2_flag);
|
||||
header |= S_411_SRC_ADDR_HI(src_va >> 32);
|
||||
header |= S_412_SRC_ADDR_HI(src_va >> 32);
|
||||
radeon_emit(PKT3(PKT3_CP_DMA, 4, predicating));
|
||||
radeon_emit(src_va); /* SRC_ADDR_LO [31:0] */
|
||||
radeon_emit(header); /* SRC_ADDR_HI [15:0] + flags. */
|
||||
|
|
@ -168,14 +166,14 @@ radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radv_cmd_stream
|
|||
uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
|
||||
|
||||
if (gfx_level >= GFX9) {
|
||||
command |= S_415_BYTE_COUNT_GFX9(aligned_size) | S_415_DISABLE_WR_CONFIRM_GFX9(1);
|
||||
header |= S_411_DST_SEL(V_411_NOWHERE);
|
||||
command |= S_506_BYTE_COUNT(aligned_size) | S_506_DISABLE_WR_CONFIRM(1);
|
||||
header |= S_501_DST_SEL(V_501_DST_NOWHERE);
|
||||
} else {
|
||||
command |= S_415_BYTE_COUNT_GFX6(aligned_size) | S_415_DISABLE_WR_CONFIRM_GFX6(1);
|
||||
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
|
||||
command |= S_415_BYTE_COUNT(aligned_size) | S_415_DISABLE_WR_CONFIRM(1);
|
||||
header |= S_501_DST_SEL(V_501_DST_ADDR_USING_L2);
|
||||
}
|
||||
|
||||
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
|
||||
header |= S_501_SRC_SEL(V_501_SRC_ADDR_USING_L2);
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit(PKT3(PKT3_DMA_DATA, 5, predicating));
|
||||
|
|
|
|||
|
|
@ -45,43 +45,43 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
|
|||
assert(!(flush_bits & (RADV_CMD_FLAG_VGT_STREAMOUT_SYNC)));
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
|
||||
gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
|
||||
gcr_cntl |= S_587_GLI_INV(V_587_GLI_ALL);
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
|
||||
}
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
|
||||
gcr_cntl |= S_586_GLK_INV(1);
|
||||
gcr_cntl |= S_587_GLK_INV(1);
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
|
||||
}
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
|
||||
gcr_cntl |= S_586_GLV_INV(1);
|
||||
gcr_cntl |= S_587_GLV_INV(1);
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
|
||||
}
|
||||
if (flush_bits & (RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE) && gfx_level < GFX12) {
|
||||
gcr_cntl |= S_586_GL1_INV(1);
|
||||
gcr_cntl |= S_587_GL1_INV(1);
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_L1;
|
||||
}
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_L2) {
|
||||
/* Writeback and invalidate everything in L2. */
|
||||
gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1);
|
||||
gcr_cntl |= S_587_GL2_INV(1) | S_587_GL2_WB(1);
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
|
||||
} else if (flush_bits & RADV_CMD_FLAG_WB_L2) {
|
||||
/* Writeback but do not invalidate.
|
||||
* GLM doesn't support WB alone. If WB is set, INV must be set too.
|
||||
*/
|
||||
gcr_cntl |= S_586_GL2_WB(1);
|
||||
gcr_cntl |= S_587_GL2_WB(1);
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2;
|
||||
}
|
||||
|
||||
if (gfx_level < GFX12 &&
|
||||
(flush_bits & (RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_INV_L2_METADATA))) {
|
||||
gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
|
||||
gcr_cntl |= S_587_GLM_INV(1) | S_587_GLM_WB(1);
|
||||
}
|
||||
|
||||
if (flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
|
||||
|
|
@ -107,7 +107,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
|
|||
}
|
||||
|
||||
/* First flush CB/DB, then L1/L2. */
|
||||
gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
|
||||
gcr_cntl |= S_587_SEQ(V_587_SEQ_FORWARD);
|
||||
|
||||
if ((flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) ==
|
||||
(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
|
||||
|
|
@ -153,13 +153,13 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
|
|||
/* Send an event that flushes caches. */
|
||||
ac_emit_cp_release_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, gcr_cntl);
|
||||
|
||||
gcr_cntl &= C_586_GLK_WB & C_586_GLK_INV & C_586_GLV_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
|
||||
gcr_cntl &= C_587_GLK_WB & C_587_GLK_INV & C_587_GLV_INV & C_587_GL2_INV & C_587_GL2_WB; /* keep SEQ */
|
||||
|
||||
if (gfx_level < GFX12)
|
||||
gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GL1_INV;
|
||||
gcr_cntl &= C_587_GLM_WB & C_587_GLM_INV & C_587_GL1_INV;
|
||||
|
||||
/* Wait for the event and invalidate remaining caches if needed. */
|
||||
ac_emit_cp_acquire_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, V_580_CP_PFP, 0, gcr_cntl);
|
||||
ac_emit_cp_acquire_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, V_581B_CP_PFP, 0, gcr_cntl);
|
||||
|
||||
gcr_cntl = 0; /* all done */
|
||||
} else {
|
||||
|
|
@ -172,27 +172,27 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
|
|||
* implied).
|
||||
*/
|
||||
/* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */
|
||||
unsigned glm_wb = G_586_GLM_WB(gcr_cntl);
|
||||
unsigned glm_inv = G_586_GLM_INV(gcr_cntl);
|
||||
unsigned glv_inv = G_586_GLV_INV(gcr_cntl);
|
||||
unsigned gl1_inv = G_586_GL1_INV(gcr_cntl);
|
||||
assert(G_586_GL2_US(gcr_cntl) == 0);
|
||||
assert(G_586_GL2_RANGE(gcr_cntl) == 0);
|
||||
assert(G_586_GL2_DISCARD(gcr_cntl) == 0);
|
||||
unsigned gl2_inv = G_586_GL2_INV(gcr_cntl);
|
||||
unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
|
||||
unsigned gcr_seq = G_586_SEQ(gcr_cntl);
|
||||
unsigned glm_wb = G_587_GLM_WB(gcr_cntl);
|
||||
unsigned glm_inv = G_587_GLM_INV(gcr_cntl);
|
||||
unsigned glv_inv = G_587_GLV_INV(gcr_cntl);
|
||||
unsigned gl1_inv = G_587_GL1_INV(gcr_cntl);
|
||||
assert(G_587_GL2_US(gcr_cntl) == 0);
|
||||
assert(G_587_GL2_RANGE(gcr_cntl) == 0);
|
||||
assert(G_587_GL2_DISCARD(gcr_cntl) == 0);
|
||||
unsigned gl2_inv = G_587_GL2_INV(gcr_cntl);
|
||||
unsigned gl2_wb = G_587_GL2_WB(gcr_cntl);
|
||||
unsigned gcr_seq = G_587_SEQ(gcr_cntl);
|
||||
|
||||
gcr_cntl &=
|
||||
C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
|
||||
C_587_GLM_WB & C_587_GLM_INV & C_587_GLV_INV & C_587_GL1_INV & C_587_GL2_INV & C_587_GL2_WB; /* keep SEQ */
|
||||
|
||||
assert(flush_cnt);
|
||||
(*flush_cnt)++;
|
||||
|
||||
radv_cs_emit_write_event_eop(
|
||||
cs, gfx_level, cb_db_event,
|
||||
S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | S_490_GL1_INV(gl1_inv) |
|
||||
S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | S_490_SEQ(gcr_seq),
|
||||
S_491_GLM_WB(glm_wb) | S_491_GLM_INV(glm_inv) | S_491_GLV_INV(glv_inv) | S_491_GL1_INV(gl1_inv) |
|
||||
S_491_GL2_INV(gl2_inv) | S_491_GL2_WB(gl2_wb) | S_491_SEQ(gcr_seq),
|
||||
EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, 0);
|
||||
|
||||
radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
|
||||
|
|
@ -207,8 +207,8 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev
|
|||
}
|
||||
|
||||
/* Ignore fields that only modify the behavior of other fields. */
|
||||
if (gcr_cntl & C_586_GL2_RANGE & C_586_SEQ & (gfx_level >= GFX12 ? ~0 : C_586_GL1_RANGE)) {
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, gcr_cntl);
|
||||
if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) {
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, gcr_cntl);
|
||||
} else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
|
||||
!is_mec) {
|
||||
|
|
@ -392,7 +392,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
}
|
||||
|
||||
if ((flush_bits & RADV_CMD_FLAG_INV_L2) || (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP,
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
|
||||
cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8));
|
||||
cp_coher_cntl = 0;
|
||||
|
|
@ -406,14 +406,15 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
*
|
||||
* WB doesn't work without NC.
|
||||
*/
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP,
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
|
||||
cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
|
||||
}
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP,
|
||||
cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
|
||||
|
|
@ -424,7 +425,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e
|
|||
* Therefore, it should be last. Done in PFP.
|
||||
*/
|
||||
if (cp_coher_cntl)
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, cp_coher_cntl);
|
||||
ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, cp_coher_cntl);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ radv_cs_write_data_head(const struct radv_device *device, struct radv_cmd_stream
|
|||
const unsigned cdw_end = radeon_check_space(device->ws, cs->b, 4 + count);
|
||||
|
||||
if (cs->hw_ip == AMD_IP_COMPUTE || cs->hw_ip == AMD_IP_GFX) {
|
||||
ac_emit_cp_write_data_head(cs->b, engine_sel, V_370_MEM, va, count, predicating);
|
||||
ac_emit_cp_write_data_head(cs->b, engine_sel, V_371_MEMORY, va, count, predicating);
|
||||
} else if (cs->hw_ip == AMD_IP_SDMA) {
|
||||
ac_emit_sdma_write_data_head(cs->b, va, count);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1017,7 +1017,7 @@ dgc_emit_indirect_buffer(struct dgc_cmdbuf *cs, nir_def *va, nir_def *ib_offset,
|
|||
nir_imm_int(b, PKT3(PKT3_INDIRECT_BUFFER, 2, 0)),
|
||||
nir_iadd(b, load_param32(b, upload_addr), ib_offset),
|
||||
nir_imm_int(b, pdev->info.address32_hi),
|
||||
nir_ior_imm(b, ib_cdw, S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(false)),
|
||||
nir_ior_imm(b, ib_cdw, S_3F3_CHAIN(1) | S_3F3_VALID(1) | S_3F3_PRE_ENA(false)),
|
||||
};
|
||||
|
||||
nir_store_global(b, nir_vec(b, packet, 4), va, .access = ACCESS_NON_READABLE);
|
||||
|
|
@ -1277,7 +1277,7 @@ dgc_gfx12_emit_hiz_wa(struct dgc_cmdbuf *cs)
|
|||
if (pdev->gfx12_hiz_wa == RADV_GFX12_HIZ_WA_PARTIAL) {
|
||||
dgc_cs_begin(cs);
|
||||
dgc_cs_emit_imm(PKT3(PKT3_RELEASE_MEM, 6, 0));
|
||||
dgc_cs_emit_imm(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5));
|
||||
dgc_cs_emit_imm(S_491_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_491_EVENT_INDEX(5));
|
||||
dgc_cs_emit_imm(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */
|
||||
dgc_cs_emit_imm(0); /* ADDRESS_LO */
|
||||
dgc_cs_emit_imm(0); /* ADDRESS_HI */
|
||||
|
|
|
|||
|
|
@ -602,7 +602,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query
|
|||
if (end) {
|
||||
uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass;
|
||||
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, signal_va, 1);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, signal_va, 1);
|
||||
}
|
||||
|
||||
*skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1;
|
||||
|
|
@ -630,7 +630,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
|
|||
radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo);
|
||||
|
||||
uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, perf_ctr_va, 0);
|
||||
|
||||
radv_pc_wait_idle(cmd_buffer);
|
||||
radv_perfcounter_emit_reset(cs);
|
||||
|
|
|
|||
|
|
@ -671,7 +671,7 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q
|
|||
radeon_check_space(device->ws, ace_cs->b, 11);
|
||||
|
||||
gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
|
||||
ac_emit_cp_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(ace_cs->b, V_371_MICRO_ENGINE, va + task_invoc_offset + 4, 0x80000000);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
|
@ -745,7 +745,7 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que
|
|||
radeon_check_space(device->ws, ace_cs->b, 11);
|
||||
|
||||
gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset);
|
||||
ac_emit_cp_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(ace_cs->b, V_371_MICRO_ENGINE, va + task_invoc_offset + 4, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_pipeline_ace_queries--;
|
||||
|
||||
|
|
@ -970,11 +970,11 @@ radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t i
|
|||
if (pdev->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 4, 0x80000000);
|
||||
|
||||
/* written prim counter */
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 12, 0x80000000);
|
||||
|
||||
if (!cmd_buffer->state.active_emulated_prims_xfb_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
|
@ -999,11 +999,11 @@ radv_end_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t ind
|
|||
if (pdev->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 20, 0x80000000);
|
||||
|
||||
/* written prim counter */
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 28, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 28, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_emulated_prims_xfb_queries--;
|
||||
|
||||
|
|
@ -1339,7 +1339,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *
|
|||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* On GFX11+, primitives generated query are always emulated. */
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 4, 0x80000000);
|
||||
|
||||
if (!cmd_buffer->state.active_emulated_prims_gen_queries)
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY;
|
||||
|
|
@ -1363,7 +1363,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *
|
|||
if (pool->uses_emulated_queries) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 32);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 36, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 36, 0x80000000);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
|
@ -1388,7 +1388,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po
|
|||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* On GFX11+, primitives generated query are always emulated. */
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 20, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_emulated_prims_gen_queries--;
|
||||
|
||||
|
|
@ -1412,7 +1412,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po
|
|||
if (pool->uses_emulated_queries) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 40);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 44, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 44, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_emulated_prims_gen_queries--;
|
||||
|
||||
|
|
@ -1590,7 +1590,7 @@ radv_begin_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
|
|||
radv_emit_event_write(&pdev->info, cs, RADV_EVENT_WRITE_PIPELINE_STAT, va);
|
||||
} else {
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 4, 0x80000000);
|
||||
|
||||
/* Record that the command buffer needs GDS. */
|
||||
cmd_buffer->gds_needed = true;
|
||||
|
|
@ -1627,7 +1627,7 @@ radv_end_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t
|
|||
cmd_buffer->gfx9_eop_bug_va);
|
||||
} else {
|
||||
gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000);
|
||||
ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 12, 0x80000000);
|
||||
|
||||
cmd_buffer->state.active_emulated_prims_gen_queries--;
|
||||
|
||||
|
|
|
|||
|
|
@ -615,7 +615,7 @@ radv_emit_ge_rings(struct radv_device *device, struct radv_cmd_stream *cs, struc
|
|||
ac_emit_cp_release_mem_pws(cs->b, pdev->info.gfx_level, AMD_IP_GFX, V_028A90_BOTTOM_OF_PIPE_TS, 0);
|
||||
|
||||
/* Wait for the PWS counter. */
|
||||
ac_emit_cp_acquire_mem_pws(cs->b, pdev->info.gfx_level, AMD_IP_GFX, V_028A90_BOTTOM_OF_PIPE_TS, V_580_CP_ME, 0, 0);
|
||||
ac_emit_cp_acquire_mem_pws(cs->b, pdev->info.gfx_level, AMD_IP_GFX, V_028A90_BOTTOM_OF_PIPE_TS, V_581B_CP_ME, 0, 0);
|
||||
|
||||
ac_emit_cp_gfx11_ge_rings(cs->b, &pdev->info, va, pdev->gfx12_hiz_wa == RADV_GFX12_HIZ_WA_PARTIAL);
|
||||
}
|
||||
|
|
@ -1415,8 +1415,8 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||
* meant to be executed on multiple compute engines at the same time.
|
||||
*/
|
||||
radv_cp_wait_mem(ace_pre_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff);
|
||||
radv_cs_write_data(device, ace_pre_cs, V_370_ME, ace_wait_va, 1, &zero, false);
|
||||
radv_cs_write_data(device, leader_pre_cs, V_370_ME, ace_wait_va, 1, &one, false);
|
||||
radv_cs_write_data(device, ace_pre_cs, V_371_MICRO_ENGINE, ace_wait_va, 1, &zero, false);
|
||||
radv_cs_write_data(device, leader_pre_cs, V_371_MICRO_ENGINE, ace_wait_va, 1, &one, false);
|
||||
/* Create postambles for gang submission.
|
||||
* This ensures that the gang leader waits for the whole gang,
|
||||
* which is necessary because the kernel signals the userspace fence
|
||||
|
|
@ -1424,7 +1424,7 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||
* same command buffers could be submitted again while still being executed.
|
||||
*/
|
||||
radv_cp_wait_mem(leader_post_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff);
|
||||
radv_cs_write_data(device, leader_post_cs, V_370_ME, leader_wait_va, 1, &zero, false);
|
||||
radv_cs_write_data(device, leader_post_cs, V_371_MICRO_ENGINE, leader_wait_va, 1, &zero, false);
|
||||
radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
|
||||
EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -411,7 +411,7 @@ radv_amdgpu_cs_grow(struct ac_cmdbuf *_cs, size_t min_size)
|
|||
|
||||
uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
|
||||
|
||||
ib_size = align(MIN2(ib_size, ~C_3F2_IB_SIZE), ib_alignment);
|
||||
ib_size = align(MIN2(ib_size, ~C_3F3_IB_SIZE), ib_alignment);
|
||||
|
||||
VkResult result = radv_amdgpu_cs_bo_create(cs, ib_size);
|
||||
|
||||
|
|
@ -437,7 +437,7 @@ radv_amdgpu_cs_grow(struct ac_cmdbuf *_cs, size_t min_size)
|
|||
cs->base.buf[cs->base.cdw - 4] = PKT3(PKT3_INDIRECT_BUFFER, 2, 0);
|
||||
cs->base.buf[cs->base.cdw - 3] = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
|
||||
cs->base.buf[cs->base.cdw - 2] = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va >> 32;
|
||||
cs->base.buf[cs->base.cdw - 1] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
|
||||
cs->base.buf[cs->base.cdw - 1] = S_3F3_CHAIN(1) | S_3F3_VALID(1);
|
||||
|
||||
cs->ib_size_ptr = cs->base.buf + cs->base.cdw - 1;
|
||||
}
|
||||
|
|
@ -490,7 +490,7 @@ radv_amdgpu_cs_finalize(struct ac_cmdbuf *_cs)
|
|||
/* Emit 4 dwords of NOP, these will be replaced by the chaining INDIRECT_BUFFER. */
|
||||
radv_amdgpu_cs_emit_nops(cs, 4);
|
||||
|
||||
assert(cs->base.cdw <= ~C_3F2_IB_SIZE);
|
||||
assert(cs->base.cdw <= ~C_3F3_IB_SIZE);
|
||||
*cs->ib_size_ptr |= cs->base.cdw;
|
||||
} else {
|
||||
radv_amdgpu_winsys_cs_pad(_cs, 0);
|
||||
|
|
@ -498,7 +498,7 @@ radv_amdgpu_cs_finalize(struct ac_cmdbuf *_cs)
|
|||
|
||||
/* Append the current (last) IB to the array of IB buffers. */
|
||||
radv_amdgpu_cs_add_ib_buffer(cs, cs->ib_buffer, cs->ib_buffer->va,
|
||||
cs->chain_ib ? G_3F2_IB_SIZE(*cs->ib_size_ptr) : cs->base.cdw);
|
||||
cs->chain_ib ? G_3F3_IB_SIZE(*cs->ib_size_ptr) : cs->base.cdw);
|
||||
|
||||
/* Prevent freeing this BO twice. */
|
||||
cs->ib_buffer = NULL;
|
||||
|
|
@ -587,7 +587,7 @@ radv_amdgpu_cs_chain(struct ac_cmdbuf *cs, struct ac_cmdbuf *next_cs, bool pre_e
|
|||
cs->buf[cs->cdw - 4] = PKT3(PKT3_INDIRECT_BUFFER, 2, 0);
|
||||
cs->buf[cs->cdw - 3] = next_acs->ib.ib_mc_address;
|
||||
cs->buf[cs->cdw - 2] = next_acs->ib.ib_mc_address >> 32;
|
||||
cs->buf[cs->cdw - 1] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(pre_ena) | next_acs->ib.size;
|
||||
cs->buf[cs->cdw - 1] = S_3F3_CHAIN(1) | S_3F3_VALID(1) | S_3F3_PRE_ENA(pre_ena) | next_acs->ib.size;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -752,7 +752,7 @@ radv_amdgpu_cs_execute_ib(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *bo, ui
|
|||
return;
|
||||
|
||||
assert(ib_va && ib_va % cs->ws->info.ip[cs->hw_ip].ib_alignment == 0);
|
||||
assert(cs->hw_ip == AMD_IP_GFX && cdw <= ~C_3F2_IB_SIZE);
|
||||
assert(cs->hw_ip == AMD_IP_GFX && cdw <= ~C_3F3_IB_SIZE);
|
||||
|
||||
ac_emit_cp_indirect_buffer(&cs->base, ib_va, cdw, 0, predicate);
|
||||
}
|
||||
|
|
@ -772,17 +772,18 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui
|
|||
cs->ws->base.cs_execute_ib(_cs, NULL, va, cdw, predicate);
|
||||
} else {
|
||||
assert(va && va % cs->ws->info.ip[cs->hw_ip].ib_alignment == 0);
|
||||
assert(cdw <= ~C_3F2_IB_SIZE);
|
||||
assert(cdw <= ~C_3F3_IB_SIZE);
|
||||
|
||||
/* Emit a WRITE_DATA packet to patch the DGC CS. */
|
||||
const uint32_t chain_data[] = {
|
||||
PKT3(PKT3_INDIRECT_BUFFER, 2, 0),
|
||||
0,
|
||||
0,
|
||||
S_3F2_CHAIN(1) | S_3F2_VALID(1),
|
||||
S_3F3_CHAIN(1) | S_3F3_VALID(1),
|
||||
};
|
||||
|
||||
ac_emit_cp_write_data(&cs->base, V_370_ME, V_370_MEM, trailer_va, ARRAY_SIZE(chain_data), chain_data, false);
|
||||
ac_emit_cp_write_data(&cs->base, V_371_MICRO_ENGINE, V_371_MEMORY, trailer_va, ARRAY_SIZE(chain_data), chain_data,
|
||||
false);
|
||||
|
||||
/* Keep pointers for patching later. */
|
||||
uint64_t *ib_va_ptr = (uint64_t *)(cs->base.buf + cs->base.cdw - 3);
|
||||
|
|
@ -790,7 +791,7 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui
|
|||
|
||||
/* Writeback L2 because CP isn't coherent with L2 on GFX6-8. */
|
||||
if (cs->ws->info.gfx_level == GFX8) {
|
||||
ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_580_CP_ME,
|
||||
ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_581B_CP_ME,
|
||||
S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
|
||||
}
|
||||
|
||||
|
|
@ -801,7 +802,7 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui
|
|||
_cs->buf[_cs->cdw - 4] = PKT3(PKT3_INDIRECT_BUFFER, 2, 0);
|
||||
_cs->buf[_cs->cdw - 3] = va;
|
||||
_cs->buf[_cs->cdw - 2] = va >> 32;
|
||||
_cs->buf[_cs->cdw - 1] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | cdw;
|
||||
_cs->buf[_cs->cdw - 1] = S_3F3_CHAIN(1) | S_3F3_VALID(1) | cdw;
|
||||
|
||||
/* Allocate a new CS BO with initial size. */
|
||||
const uint64_t ib_size = radv_amdgpu_cs_get_initial_size(cs->ws, cs->hw_ip);
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ static struct si_resource *si_get_wait_mem_scratch_bo(struct si_context *ctx,
|
|||
PIPE_RESOURCE_FLAG_ENCRYPTED,
|
||||
PIPE_USAGE_DEFAULT, 4,
|
||||
sscreen->info.tcc_cache_line_size);
|
||||
si_cp_write_data(ctx, ctx->wait_mem_scratch_tmz, 0, 4, V_370_MEM, V_370_ME,
|
||||
si_cp_write_data(ctx, ctx->wait_mem_scratch_tmz, 0, 4, V_371_MEMORY, V_371_MICRO_ENGINE,
|
||||
&ctx->wait_mem_number);
|
||||
}
|
||||
|
||||
|
|
@ -159,13 +159,13 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
assert(ctx->gfx_level < GFX12 || !(flags & SI_BARRIER_INV_L2_METADATA));
|
||||
|
||||
if (flags & SI_BARRIER_INV_ICACHE)
|
||||
gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
|
||||
gcr_cntl |= S_587_GLI_INV(V_587_GLI_ALL);
|
||||
if (flags & SI_BARRIER_INV_SMEM)
|
||||
gcr_cntl |= S_586_GLK_INV(1);
|
||||
gcr_cntl |= S_587_GLK_INV(1);
|
||||
if (flags & SI_BARRIER_INV_VMEM)
|
||||
gcr_cntl |= S_586_GLV_INV(1);
|
||||
gcr_cntl |= S_587_GLV_INV(1);
|
||||
if (ctx->gfx_level < GFX12 && flags & (SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM))
|
||||
gcr_cntl |= S_586_GL1_INV(1);
|
||||
gcr_cntl |= S_587_GL1_INV(1);
|
||||
|
||||
/* The L2 cache ops are:
|
||||
* - INV: - invalidate lines that reflect memory (were loaded from memory)
|
||||
|
|
@ -178,14 +178,14 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
* GLM doesn't support WB alone. If WB is set, INV must be set too.
|
||||
*/
|
||||
if (flags & SI_BARRIER_INV_L2)
|
||||
gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1); /* Writeback and invalidate everything in L2. */
|
||||
gcr_cntl |= S_587_GL2_INV(1) | S_587_GL2_WB(1); /* Writeback and invalidate everything in L2. */
|
||||
else if (flags & SI_BARRIER_WB_L2)
|
||||
gcr_cntl |= S_586_GL2_WB(1);
|
||||
gcr_cntl |= S_587_GL2_WB(1);
|
||||
|
||||
/* Invalidate the metadata cache. */
|
||||
if (ctx->gfx_level < GFX12 &&
|
||||
flags & (SI_BARRIER_INV_L2 | SI_BARRIER_WB_L2 | SI_BARRIER_INV_L2_METADATA))
|
||||
gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
|
||||
gcr_cntl |= S_587_GLM_INV(1) | S_587_GLM_WB(1);
|
||||
|
||||
/* Flush CB/DB. Note that this also idles all shaders, including compute shaders. */
|
||||
if (flags & (SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB)) {
|
||||
|
|
@ -215,15 +215,15 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
radeon_end();
|
||||
|
||||
/* First flush CB/DB, then L1/L2. */
|
||||
gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
|
||||
gcr_cntl |= S_587_SEQ(V_587_SEQ_FORWARD);
|
||||
|
||||
if (ctx->gfx_level >= GFX11) {
|
||||
si_cp_release_mem_pws(ctx, cs, cb_db_event, gcr_cntl & C_586_GLI_INV);
|
||||
si_cp_release_mem_pws(ctx, cs, cb_db_event, gcr_cntl & C_587_GLI_INV);
|
||||
|
||||
/* Wait for the event and invalidate remaining caches if needed. */
|
||||
si_cp_acquire_mem_pws(ctx, cs, cb_db_event,
|
||||
flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME,
|
||||
gcr_cntl & ~C_586_GLI_INV, /* keep only GLI_INV */
|
||||
flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME,
|
||||
gcr_cntl & ~C_587_GLI_INV, /* keep only GLI_INV */
|
||||
0, flags);
|
||||
|
||||
gcr_cntl = 0; /* all done */
|
||||
|
|
@ -241,28 +241,28 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
ctx->wait_mem_number++;
|
||||
|
||||
/* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */
|
||||
unsigned glm_wb = G_586_GLM_WB(gcr_cntl);
|
||||
unsigned glm_inv = G_586_GLM_INV(gcr_cntl);
|
||||
unsigned glv_inv = G_586_GLV_INV(gcr_cntl);
|
||||
unsigned gl1_inv = G_586_GL1_INV(gcr_cntl);
|
||||
assert(G_586_GL2_US(gcr_cntl) == 0);
|
||||
assert(G_586_GL2_RANGE(gcr_cntl) == 0);
|
||||
assert(G_586_GL2_DISCARD(gcr_cntl) == 0);
|
||||
unsigned gl2_inv = G_586_GL2_INV(gcr_cntl);
|
||||
unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
|
||||
unsigned gcr_seq = G_586_SEQ(gcr_cntl);
|
||||
unsigned glm_wb = G_587_GLM_WB(gcr_cntl);
|
||||
unsigned glm_inv = G_587_GLM_INV(gcr_cntl);
|
||||
unsigned glv_inv = G_587_GLV_INV(gcr_cntl);
|
||||
unsigned gl1_inv = G_587_GL1_INV(gcr_cntl);
|
||||
assert(G_587_GL2_US(gcr_cntl) == 0);
|
||||
assert(G_587_GL2_RANGE(gcr_cntl) == 0);
|
||||
assert(G_587_GL2_DISCARD(gcr_cntl) == 0);
|
||||
unsigned gl2_inv = G_587_GL2_INV(gcr_cntl);
|
||||
unsigned gl2_wb = G_587_GL2_WB(gcr_cntl);
|
||||
unsigned gcr_seq = G_587_SEQ(gcr_cntl);
|
||||
|
||||
gcr_cntl &= C_586_GLV_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */
|
||||
gcr_cntl &= C_587_GLV_INV & C_587_GL2_INV & C_587_GL2_WB; /* keep SEQ */
|
||||
|
||||
if (ctx->gfx_level < GFX12)
|
||||
gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GL1_INV;
|
||||
gcr_cntl &= C_587_GLM_WB & C_587_GLM_INV & C_587_GL1_INV;
|
||||
|
||||
si_cp_release_mem(ctx, cs, cb_db_event,
|
||||
(ctx->gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) |
|
||||
S_490_GL1_INV(gl1_inv)) |
|
||||
S_490_GLV_INV(glv_inv) |
|
||||
S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
|
||||
S_490_SEQ(gcr_seq),
|
||||
(ctx->gfx_level >= GFX12 ? 0 : S_491_GLM_WB(glm_wb) | S_491_GLM_INV(glm_inv) |
|
||||
S_491_GL1_INV(gl1_inv)) |
|
||||
S_491_GLV_INV(glv_inv) |
|
||||
S_491_GL2_INV(gl2_inv) | S_491_GL2_WB(gl2_wb) |
|
||||
S_491_SEQ(gcr_seq),
|
||||
EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
|
||||
EOP_DATA_SEL_VALUE_32BIT, wait_mem_scratch, va, ctx->wait_mem_number,
|
||||
SI_NOT_QUERY);
|
||||
|
|
@ -294,9 +294,9 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs)
|
|||
}
|
||||
|
||||
/* Ignore fields that only modify the behavior of other fields. */
|
||||
if (gcr_cntl & C_586_GL2_RANGE & C_586_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_586_GL1_RANGE)) {
|
||||
if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) {
|
||||
si_cp_acquire_mem(ctx, cs, gcr_cntl,
|
||||
flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME);
|
||||
flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME);
|
||||
} else if (flags & SI_BARRIER_PFP_SYNC_ME) {
|
||||
si_cp_pfp_sync_me(cs);
|
||||
}
|
||||
|
|
@ -457,7 +457,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
*
|
||||
* GFX6-GFX7 don't support L2 write-back.
|
||||
*/
|
||||
unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME;
|
||||
unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME;
|
||||
|
||||
if (flags & SI_BARRIER_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_BARRIER_WB_L2)) {
|
||||
/* Invalidate L1 & L2. WB must be set on GFX8+ when TC_ACTION is set. */
|
||||
|
|
@ -485,7 +485,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
|||
S_0301F0_TC_NC_ACTION_ENA(1),
|
||||
/* If this is not the last ACQUIRE_MEM, flush in ME.
|
||||
* We only want to synchronize with PFP in the last ACQUIRE_MEM. */
|
||||
last_acquire_mem ? engine : V_580_CP_ME);
|
||||
last_acquire_mem ? engine : V_581B_CP_ME);
|
||||
|
||||
if (last_acquire_mem)
|
||||
flags &= ~SI_BARRIER_PFP_SYNC_ME;
|
||||
|
|
|
|||
|
|
@ -397,7 +397,7 @@
|
|||
|
||||
#define radeon_emit_alt_hiz_packets() do { \
|
||||
radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); \
|
||||
radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); \
|
||||
radeon_emit(S_491_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_491_EVENT_INDEX(5)); \
|
||||
radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ \
|
||||
radeon_emit(0); /* ADDRESS_LO */ \
|
||||
radeon_emit(0); /* ADDRESS_HI */ \
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
|
|||
{
|
||||
unsigned max =
|
||||
sctx->gfx_level >= GFX11 ? 32767 :
|
||||
sctx->gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u);
|
||||
sctx->gfx_level >= GFX9 ? S_506_BYTE_COUNT(~0u) : S_415_BYTE_COUNT(~0u);
|
||||
|
||||
/* make it aligned for optimal performance */
|
||||
return max & ~(SI_CPDMA_ALIGNMENT - 1);
|
||||
|
|
@ -49,26 +49,26 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
|||
}
|
||||
|
||||
if (sctx->gfx_level >= GFX9)
|
||||
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||
command |= S_506_BYTE_COUNT(size);
|
||||
else
|
||||
command |= S_415_BYTE_COUNT_GFX6(size);
|
||||
command |= S_415_BYTE_COUNT(size);
|
||||
|
||||
/* Sync flags. */
|
||||
if (flags & CP_DMA_SYNC)
|
||||
header |= S_411_CP_SYNC(1);
|
||||
header |= S_501_CP_SYNC(1);
|
||||
|
||||
if (flags & CP_DMA_RAW_WAIT)
|
||||
command |= S_415_RAW_WAIT(1);
|
||||
command |= S_506_RAW_WAIT(1);
|
||||
|
||||
/* Src and dst flags. */
|
||||
/* GFX12: TC_L2 means MALL, which should always be set. */
|
||||
if (sctx->screen->info.cp_dma_use_L2 || sctx->gfx_level == GFX12)
|
||||
header |= S_501_DST_SEL(V_501_DST_ADDR_TC_L2);
|
||||
header |= S_501_DST_SEL(V_501_DST_ADDR_USING_L2);
|
||||
|
||||
if (flags & CP_DMA_CLEAR) {
|
||||
header |= S_411_SRC_SEL(V_411_DATA);
|
||||
header |= S_501_SRC_SEL(V_501_DATA);
|
||||
} else if (sctx->screen->info.cp_dma_use_L2 || sctx->gfx_level == GFX12) {
|
||||
header |= S_501_SRC_SEL(V_501_SRC_ADDR_TC_L2);
|
||||
header |= S_501_SRC_SEL(V_501_SRC_ADDR_USING_L2);
|
||||
}
|
||||
|
||||
radeon_begin(cs);
|
||||
|
|
@ -82,7 +82,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui
|
|||
radeon_emit(dst_va >> 32); /* DST_ADDR_HI [31:0] */
|
||||
radeon_emit(command);
|
||||
} else {
|
||||
header |= S_411_SRC_ADDR_HI(src_va >> 32);
|
||||
header |= S_412_SRC_ADDR_HI(src_va >> 32);
|
||||
|
||||
radeon_emit(PKT3(PKT3_CP_DMA, 4, 0));
|
||||
radeon_emit(src_va); /* SRC_ADDR_LO [31:0] */
|
||||
|
|
@ -313,8 +313,8 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned
|
|||
assert(offset % 4 == 0);
|
||||
assert(size % 4 == 0);
|
||||
|
||||
if (sctx->gfx_level == GFX6 && dst_sel == V_370_MEM)
|
||||
dst_sel = V_370_MEM_GRBM;
|
||||
if (sctx->gfx_level == GFX6 && dst_sel == V_371_MEMORY)
|
||||
dst_sel = V_371_MEM_GRBM;
|
||||
|
||||
radeon_add_to_buffer_list(sctx, cs, buf, RADEON_USAGE_WRITE | RADEON_PRIO_CP_DMA);
|
||||
uint64_t va = buf->gpu_address + offset;
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign
|
|||
if (!compute_ib)
|
||||
sctx->context_roll = true;
|
||||
|
||||
if (engine == V_580_CP_PFP)
|
||||
if (engine == V_581B_CP_PFP)
|
||||
si_cp_pfp_sync_me(cs);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1819,8 +1819,8 @@ static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc
|
|||
data = desc->list + desc_slot_offset;
|
||||
va = desc->gpu_address + desc_slot_offset * 4;
|
||||
|
||||
si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_370_TC_L2,
|
||||
V_370_ME, data);
|
||||
si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_371_TC_L2,
|
||||
V_371_MICRO_ENGINE, data);
|
||||
}
|
||||
|
||||
static void si_upload_bindless_descriptors(struct si_context *sctx)
|
||||
|
|
|
|||
|
|
@ -218,7 +218,7 @@ static void si_fine_fence_set(struct si_context *ctx, struct si_fine_fence *fine
|
|||
if (flags & PIPE_FLUSH_TOP_OF_PIPE) {
|
||||
uint32_t value = 0x80000000;
|
||||
|
||||
si_cp_write_data(ctx, fine->buf, fine->offset, 4, V_370_MEM, V_370_PFP, &value);
|
||||
si_cp_write_data(ctx, fine->buf, fine->offset, 4, V_371_MEMORY, V_371_PREFETCH_PARSER, &value);
|
||||
} else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
|
||||
uint64_t fence_va = fine->buf->gpu_address + fine->offset;
|
||||
|
||||
|
|
|
|||
|
|
@ -573,7 +573,7 @@ void si_trace_emit(struct si_context *sctx)
|
|||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
|
||||
|
||||
si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_370_MEM, V_370_ME, &trace_id);
|
||||
si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_371_MEMORY, V_371_MICRO_ENGINE, &trace_id);
|
||||
|
||||
ac_emit_cp_nop(&cs->current, AC_ENCODE_TRACE_POINT(trace_id));
|
||||
|
||||
|
|
|
|||
|
|
@ -785,7 +785,7 @@ static void si_emit_task_wait_packets(struct si_context *sctx)
|
|||
if (sctx->task_wait_count == sctx->last_task_wait_count)
|
||||
return;
|
||||
|
||||
si_cp_write_data(sctx, sctx->task_wait_buf, 0, 4, V_370_MEM, V_370_ME,
|
||||
si_cp_write_data(sctx, sctx->task_wait_buf, 0, 4, V_371_MEMORY, V_371_MICRO_ENGINE,
|
||||
&sctx->task_wait_count);
|
||||
|
||||
si_cp_wait_mem(sctx, sctx->gfx_cs.gang_cs, sctx->task_wait_buf->gpu_address,
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ static void si_pc_wait_idle(struct si_context *sctx)
|
|||
radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
|
||||
radeon_end();
|
||||
|
||||
si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_580_CP_PFP);
|
||||
si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_581B_CP_PFP);
|
||||
}
|
||||
|
||||
static void si_pc_emit_instance(struct si_context *sctx, int se, int instance)
|
||||
|
|
|
|||
|
|
@ -858,7 +858,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
|
|||
goto fail;
|
||||
}
|
||||
|
||||
si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4, V_370_MEM, V_370_ME,
|
||||
si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4, V_371_MEMORY, V_371_MICRO_ENGINE,
|
||||
&sctx->wait_mem_number);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -844,7 +844,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h
|
|||
/* Clear the emulated counter end value. We don't clear start because it's unused. */
|
||||
va += si_query_pipestat_end_dw_offset(sctx->screen, query->index) * 4;
|
||||
|
||||
ac_emit_cp_write_data_imm(&cs->current, V_370_PFP, va, 0);
|
||||
ac_emit_cp_write_data_imm(&cs->current, V_371_PREFETCH_PARSER, va, 0);
|
||||
|
||||
sctx->num_pipeline_stat_emulated_queries++;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -643,18 +643,18 @@ static void si_cp_dma_prefetch_inline(struct radeon_cmdbuf *cs, uint64_t address
|
|||
*/
|
||||
assert(size % SI_CPDMA_ALIGNMENT == 0);
|
||||
assert(address % SI_CPDMA_ALIGNMENT == 0);
|
||||
assert(size < S_415_BYTE_COUNT_GFX6(~0u));
|
||||
assert(size < S_415_BYTE_COUNT(~0u));
|
||||
assert(address || size == 0);
|
||||
|
||||
uint32_t header = S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
|
||||
uint32_t command = S_415_BYTE_COUNT_GFX6(size);
|
||||
uint32_t header = S_501_SRC_SEL(V_501_SRC_ADDR_USING_L2);
|
||||
uint32_t command = S_415_BYTE_COUNT(size);
|
||||
|
||||
if (GFX_VERSION >= GFX9) {
|
||||
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
|
||||
header |= S_411_DST_SEL(V_411_NOWHERE);
|
||||
command |= S_506_DISABLE_WR_CONFIRM(1);
|
||||
header |= S_501_DST_SEL(V_501_DST_NOWHERE);
|
||||
} else {
|
||||
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
|
||||
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
|
||||
command |= S_415_DISABLE_WR_CONFIRM(1);
|
||||
header |= S_501_DST_SEL(V_501_DST_ADDR_USING_L2);
|
||||
}
|
||||
|
||||
radeon_begin(cs);
|
||||
|
|
|
|||
|
|
@ -4982,7 +4982,7 @@ static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index)
|
|||
* in memory.
|
||||
*/
|
||||
si_cp_release_acquire_mem_pws(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
V_580_CP_ME, 0);
|
||||
V_581B_CP_ME, 0);
|
||||
|
||||
uint64_t attr_address = sctx->ws->cs_is_secure(&sctx->gfx_cs) ?
|
||||
sscreen->attribute_pos_prim_ring_tmz->gpu_address :
|
||||
|
|
|
|||
|
|
@ -810,11 +810,11 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *aws,
|
|||
static void amdgpu_set_ib_size(struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib)
|
||||
{
|
||||
if (ib->is_chained_ib) {
|
||||
*ib->ptr_ib_size = rcs->current.cdw | S_3F2_CHAIN(1) | S_3F2_VALID(1);
|
||||
*ib->ptr_ib_size = rcs->current.cdw | S_3F3_CHAIN(1) | S_3F3_VALID(1);
|
||||
|
||||
struct amdgpu_cs *acs = amdgpu_cs(rcs);
|
||||
if (!rcs->gang && acs->preamble_ib_bo)
|
||||
*ib->ptr_ib_size |= S_3F2_PRE_ENA(1);
|
||||
*ib->ptr_ib_size |= S_3F3_PRE_ENA(1);
|
||||
} else {
|
||||
*ib->ptr_ib_size = rcs->current.cdw;
|
||||
}
|
||||
|
|
@ -1567,10 +1567,10 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws,
|
|||
|
||||
/* add release mem for user fence */
|
||||
amdgpu_pkt_add_dw(PKT3(PKT3_RELEASE_MEM, 6, 0));
|
||||
amdgpu_pkt_add_dw(S_490_EVENT_TYPE(V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT) |
|
||||
S_490_EVENT_INDEX(5) |
|
||||
(aws->info.gfx_level >= GFX12 ? 0 : S_490_GLM_WB(1) | S_490_GLM_INV(1)) |
|
||||
S_490_GL2_WB(1) | S_490_SEQ(1) | S_490_CACHE_POLICY(3));
|
||||
amdgpu_pkt_add_dw(S_491_EVENT_TYPE(V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT) |
|
||||
S_491_EVENT_INDEX(5) |
|
||||
(aws->info.gfx_level >= GFX12 ? 0 : S_491_GLM_WB(1) | S_491_GLM_INV(1)) |
|
||||
S_491_GL2_WB(1) | S_491_SEQ(1) | S_491_CACHE_POLICY(3));
|
||||
amdgpu_pkt_add_dw(S_030358_DATA_SEL(2));
|
||||
amdgpu_pkt_add_dw(userq->user_fence_va);
|
||||
amdgpu_pkt_add_dw(userq->user_fence_va >> 32);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue