diff --git a/src/amd/common/ac_cmdbuf.c b/src/amd/common/ac_cmdbuf.c index 998afa1afe0..4194ba7eb04 100644 --- a/src/amd/common/ac_cmdbuf.c +++ b/src/amd/common/ac_cmdbuf.c @@ -1020,7 +1020,8 @@ ac_cmdbuf_flush_vgt_streamout(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 3, 0)); - ac_cmdbuf_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME)); + ac_cmdbuf_emit(S_371_DST_SEL(V_371_MEM_MAPPED_REGISTER) | + S_371_ENGINE_SEL(V_371_MICRO_ENGINE)); ac_cmdbuf_emit(R_0300FC_CP_STRMOUT_CNTL >> 2); ac_cmdbuf_emit(0); ac_cmdbuf_emit(0); diff --git a/src/amd/common/ac_cmdbuf_cp.c b/src/amd/common/ac_cmdbuf_cp.c index 208d5988738..91dea443376 100644 --- a/src/amd/common/ac_cmdbuf_cp.c +++ b/src/amd/common/ac_cmdbuf_cp.c @@ -21,9 +21,9 @@ ac_emit_cp_indirect_buffer(struct ac_cmdbuf *cs, uint64_t va, uint32_t cdw, uint32_t dword2_flags = 0; if (flags & AC_CP_INDIRECT_BUFFER_CHAIN) - dword2_flags |= S_3F2_CHAIN(1); + dword2_flags |= S_3F3_CHAIN(1); if (flags & AC_CP_INDIRECT_BUFFER_VALID) - dword2_flags |= S_3F2_VALID(1); + dword2_flags |= S_3F3_VALID(1); ac_cmdbuf_begin(cs); ac_cmdbuf_emit(PKT3(PKT3_INDIRECT_BUFFER, 2, predicate)); @@ -60,9 +60,9 @@ ac_emit_cp_write_data_head(struct ac_cmdbuf *cs, uint32_t engine_sel, { ac_cmdbuf_begin(cs); ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 2 + size, predicate)); - ac_cmdbuf_emit(S_370_DST_SEL(dst_sel) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(engine_sel)); + ac_cmdbuf_emit(S_371_DST_SEL(dst_sel) | + S_371_WR_CONFIRM(1) | + S_371_ENGINE_SEL(engine_sel)); ac_cmdbuf_emit(va); ac_cmdbuf_emit(va >> 32); ac_cmdbuf_end(); @@ -83,7 +83,7 @@ void ac_emit_cp_write_data_imm(struct ac_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t value) { - ac_emit_cp_write_data(cs, engine_sel, V_370_MEM, va, 1, &value, false); + ac_emit_cp_write_data(cs, engine_sel, V_371_MEMORY, va, 1, &value, false); } void @@ -155,23 +155,23 @@ ac_emit_cp_acquire_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx const bool ts = is_ts_event(event_type); const bool ps_done = event_type == V_028A90_PS_DONE; const bool cs_done = event_type == V_028A90_CS_DONE; - const uint32_t counter_sel = ts ? V_580_TS_SELECT : ps_done ? V_580_PS_SELECT : V_580_CS_SELECT; + const uint32_t counter_sel = ts ? V_581B_TS_SELECT : ps_done ? V_581B_PS_SELECT : V_581B_CS_SELECT; assert((int)ts + (int)cs_done + (int)ps_done == 1); - assert(!gcr_cntl || stage_sel == V_580_CP_PFP || stage_sel == V_580_CP_ME); - assert(stage_sel != V_580_PRE_COLOR); + assert(!gcr_cntl || stage_sel == V_581B_CP_PFP || stage_sel == V_581B_CP_ME); + assert(stage_sel != V_581B_PRE_COLOR); ac_cmdbuf_begin(cs); ac_cmdbuf_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); - ac_cmdbuf_emit(S_580_PWS_STAGE_SEL(stage_sel) | - S_580_PWS_COUNTER_SEL(counter_sel) | - S_580_PWS_ENA2(1) | - S_580_PWS_COUNT(count)); + ac_cmdbuf_emit(S_581B_PWS_STAGE_SEL(stage_sel) | + S_581B_PWS_COUNTER_SEL(counter_sel) | + S_581B_PWS_ENA2(1) | + S_581B_PWS_COUNT(count)); ac_cmdbuf_emit(0xffffffff); /* GCR_SIZE */ ac_cmdbuf_emit(0x01ffffff); /* GCR_SIZE_HI */ ac_cmdbuf_emit(0); /* GCR_BASE_LO */ ac_cmdbuf_emit(0); /* GCR_BASE_HI */ - ac_cmdbuf_emit(S_585_PWS_ENA(1)); + ac_cmdbuf_emit(S_586B_PWS_ENA(1)); ac_cmdbuf_emit(gcr_cntl); /* GCR_CNTL (this has no effect if PWS_STAGE_SEL isn't PFP or ME) */ ac_cmdbuf_end(); } @@ -196,34 +196,34 @@ ac_emit_cp_release_mem_pws(struct ac_cmdbuf *cs, ASSERTED enum amd_gfx_level gfx event_type != V_028A90_CS_DONE)); /* Extract GCR_CNTL fields because the encoding is different in RELEASE_MEM. */ - assert(G_586_GLI_INV(gcr_cntl) == 0); - assert(gfx_level >= GFX12 || G_586_GL1_RANGE(gcr_cntl) == 0); - const uint32_t glm_wb = G_586_GLM_WB(gcr_cntl); - const uint32_t glm_inv = G_586_GLM_INV(gcr_cntl); - const uint32_t glk_wb = G_586_GLK_WB(gcr_cntl); - const uint32_t glk_inv = G_586_GLK_INV(gcr_cntl); - const uint32_t glv_inv = G_586_GLV_INV(gcr_cntl); - const uint32_t gl1_inv = G_586_GL1_INV(gcr_cntl); - assert(G_586_GL2_US(gcr_cntl) == 0); - assert(G_586_GL2_RANGE(gcr_cntl) == 0); - assert(G_586_GL2_DISCARD(gcr_cntl) == 0); - const uint32_t gl2_inv = G_586_GL2_INV(gcr_cntl); - const uint32_t gl2_wb = G_586_GL2_WB(gcr_cntl); - const uint32_t gcr_seq = G_586_SEQ(gcr_cntl); + assert(G_587_GLI_INV(gcr_cntl) == 0); + assert(gfx_level >= GFX12 || G_587_GL1_RANGE(gcr_cntl) == 0); + const uint32_t glm_wb = G_587_GLM_WB(gcr_cntl); + const uint32_t glm_inv = G_587_GLM_INV(gcr_cntl); + const uint32_t glk_wb = G_587_GLK_WB(gcr_cntl); + const uint32_t glk_inv = G_587_GLK_INV(gcr_cntl); + const uint32_t glv_inv = G_587_GLV_INV(gcr_cntl); + const uint32_t gl1_inv = G_587_GL1_INV(gcr_cntl); + assert(G_587_GL2_US(gcr_cntl) == 0); + assert(G_587_GL2_RANGE(gcr_cntl) == 0); + assert(G_587_GL2_DISCARD(gcr_cntl) == 0); + const uint32_t gl2_inv = G_587_GL2_INV(gcr_cntl); + const uint32_t gl2_wb = G_587_GL2_WB(gcr_cntl); + const uint32_t gcr_seq = G_587_SEQ(gcr_cntl); const bool ts = is_ts_event(event_type); ac_cmdbuf_begin(cs); ac_cmdbuf_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); - ac_cmdbuf_emit(S_490_EVENT_TYPE(event_type) | - S_490_EVENT_INDEX(ts ? 5 : 6) | - (gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GL1_INV(gl1_inv)) | - S_490_GLV_INV(glv_inv) | - S_490_GL2_INV(gl2_inv) | - S_490_GL2_WB(gl2_wb) | - S_490_SEQ(gcr_seq) | - S_490_GLK_WB(glk_wb) | - S_490_GLK_INV(glk_inv) | - S_490_PWS_ENABLE(1)); + ac_cmdbuf_emit(S_491_EVENT_TYPE(event_type) | + S_491_EVENT_INDEX(ts ? 5 : 6) | + (gfx_level >= GFX12 ? 0 : S_491_GLM_WB(glm_wb) | S_491_GLM_INV(glm_inv) | S_491_GL1_INV(gl1_inv)) | + S_491_GLV_INV(glv_inv) | + S_491_GL2_INV(gl2_inv) | + S_491_GL2_WB(gl2_wb) | + S_491_SEQ(gcr_seq) | + S_491_GLK_WB(glk_wb) | + S_491_GLK_INV(glk_inv) | + S_491_PWS_ENABLE(1)); ac_cmdbuf_emit(0); /* DST_SEL, INT_SEL, DATA_SEL */ ac_cmdbuf_emit(0); /* ADDRESS_LO */ ac_cmdbuf_emit(0); /* ADDRESS_HI */ @@ -403,14 +403,14 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, enum amd_ip_type ip_type, uint32_t engine, uint32_t gcr_cntl) { - assert(engine == V_580_CP_PFP || engine == V_580_CP_ME); + assert(engine == V_581B_CP_PFP || engine == V_581B_CP_ME); assert(gcr_cntl); ac_cmdbuf_begin(cs); if (gfx_level >= GFX10) { /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ - const uint32_t engine_flag = engine == V_580_CP_ME ? BITFIELD_BIT(31) : 0; + const uint32_t engine_flag = engine == V_581B_CP_ME ? BITFIELD_BIT(31) : 0; /* Flush caches. This doesn't wait for idle. */ ac_cmdbuf_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); diff --git a/src/amd/common/ac_gather_context_rolls.c b/src/amd/common/ac_gather_context_rolls.c index 778b38fbbf4..ae10cb85df9 100644 --- a/src/amd/common/ac_gather_context_rolls.c +++ b/src/amd/common/ac_gather_context_rolls.c @@ -253,7 +253,7 @@ static void ac_ib_gather_context_rolls(struct ac_context_roll_ctx *ctx, uint32_t break; case PKT3_ACQUIRE_MEM: - if (G_580_PWS_ENA2(ib[cur_dw])) { + if (G_581B_PWS_ENA2(ib[cur_dw])) { ac_record_wait_idle(ctx); } else { ac_roll_context(ctx); @@ -266,7 +266,7 @@ static void ac_ib_gather_context_rolls(struct ac_context_roll_ctx *ctx, uint32_t break; case PKT3_EVENT_WRITE: - if (G_490_EVENT_TYPE(ib[cur_dw]) == V_028A90_PS_PARTIAL_FLUSH) + if (G_491_EVENT_TYPE(ib[cur_dw]) == V_028A90_PS_PARTIAL_FLUSH) ac_record_wait_idle(ctx); break; diff --git a/src/amd/common/ac_parse_ib.c b/src/amd/common/ac_parse_ib.c index a878a0bcd3f..1396e76a3dd 100644 --- a/src/amd/common/ac_parse_ib.c +++ b/src/amd/common/ac_parse_ib.c @@ -463,11 +463,11 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, break; case PKT3_CP_DMA: /* GFX6 */ - ac_dump_reg(f, ib->gfx_level, ib->family, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_415_COMMAND, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_411_CP_DMA_WORD0, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_412_CP_DMA_WORD1, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_413_CP_DMA_WORD2, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_414_CP_DMA_WORD3, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_415_CP_DMA_COMMAND, ac_ib_get(ib), ~0); break; case PKT3_DMA_DATA: { if (ib->gfx_level >= GFX9) { @@ -482,29 +482,29 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, uint64_t dst_addr = ac_ib_get64(ib); uint32_t command = ac_ib_get(ib); - uint32_t size = G_415_BYTE_COUNT_GFX6(command); + uint32_t size = G_415_BYTE_COUNT(command); uint32_t src_sel = G_501_SRC_SEL(header); bool src_mem = (src_sel == V_501_SRC_ADDR && G_415_SAS(command) == V_415_MEMORY) || - src_sel == V_411_SRC_ADDR_TC_L2; + src_sel == V_501_SRC_ADDR_USING_L2; uint32_t dst_sel = G_501_DST_SEL(header); bool dst_mem = (dst_sel == V_501_DST_ADDR && G_415_DAS(command) == V_415_MEMORY) || - dst_sel == V_411_DST_ADDR_TC_L2; + dst_sel == V_501_DST_ADDR_USING_L2; print_addr(ib, "SRC_ADDR", src_addr, src_mem ? size : AC_ADDR_SIZE_NOT_MEMORY); print_addr(ib, "DST_ADDR", dst_addr, dst_mem ? size : AC_ADDR_SIZE_NOT_MEMORY); - ac_dump_reg(f, ib->gfx_level, ib->family, R_415_COMMAND, command, ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_415_CP_DMA_COMMAND, command, ~0); break; } case PKT3_INDIRECT_BUFFER_SI: case PKT3_INDIRECT_BUFFER: { uint32_t base_lo_dw = ac_ib_get(ib); - ac_dump_reg(f, ib->gfx_level, ib->family, R_3F0_IB_BASE_LO, base_lo_dw, ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_3F1_IB_BASE_LO, base_lo_dw, ~0); uint32_t base_hi_dw = ac_ib_get(ib); - ac_dump_reg(f, ib->gfx_level, ib->family, R_3F1_IB_BASE_HI, base_hi_dw, ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_3F2_IB_BASE_HI, base_hi_dw, ~0); uint32_t control_dw = ac_ib_get(ib); - ac_dump_reg(f, ib->gfx_level, ib->family, R_3F2_IB_CONTROL, control_dw, ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_3F3_IB_CONTROL, control_dw, ~0); if (!ib->addr_callback) break; @@ -516,9 +516,9 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, if (!data) break; - if (G_3F2_CHAIN(control_dw)) { + if (G_3F3_CHAIN(control_dw)) { ib->ib = data; - ib->num_dw = G_3F2_IB_SIZE(control_dw); + ib->num_dw = G_3F3_IB_SIZE(control_dw); ib->cur_dw = 0; return; } @@ -526,7 +526,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, struct ac_ib_parser ib_recurse; memcpy(&ib_recurse, ib, sizeof(ib_recurse)); ib_recurse.ib = data; - ib_recurse.num_dw = G_3F2_IB_SIZE(control_dw); + ib_recurse.num_dw = G_3F3_IB_SIZE(control_dw); ib_recurse.cur_dw = 0; if (ib_recurse.trace_id_count) { if (*current_trace_id == *ib->trace_ids) { diff --git a/src/amd/common/ac_shadowed_regs.c b/src/amd/common/ac_shadowed_regs.c index e36ba9f1c7f..c94e5f6b6e5 100644 --- a/src/amd/common/ac_shadowed_regs.c +++ b/src/amd/common/ac_shadowed_regs.c @@ -3002,9 +3002,9 @@ struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *i * Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory. */ ac_pm4_cmd_add(pm4, PKT3(PKT3_RELEASE_MEM, 6, 0)); - ac_pm4_cmd_add(pm4, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | - S_490_EVENT_INDEX(5) | - S_490_PWS_ENABLE(1)); + ac_pm4_cmd_add(pm4, S_491_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | + S_491_EVENT_INDEX(5) | + S_491_PWS_ENABLE(1)); ac_pm4_cmd_add(pm4, 0); /* DST_SEL, INT_SEL, DATA_SEL */ ac_pm4_cmd_add(pm4, 0); /* ADDRESS_LO */ ac_pm4_cmd_add(pm4, 0); /* ADDRESS_HI */ @@ -3012,28 +3012,28 @@ struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *i ac_pm4_cmd_add(pm4, 0); /* DATA_HI */ ac_pm4_cmd_add(pm4, 0); /* INT_CTXID */ - unsigned gcr_cntl = S_586_GL2_INV(1) | S_586_GL2_WB(1) | - (info->gfx_level >= GFX12 ? 0 : S_586_GLM_INV(1) | S_586_GLM_WB(1) | S_586_GL1_INV(1)) | - S_586_GLV_INV(1) | - S_586_GLK_INV(1) | S_586_GLI_INV(V_586_GLI_ALL); + unsigned gcr_cntl = S_587_GL2_INV(1) | S_587_GL2_WB(1) | + (info->gfx_level >= GFX12 ? 0 : S_587_GLM_INV(1) | S_587_GLM_WB(1) | S_587_GL1_INV(1)) | + S_587_GLV_INV(1) | + S_587_GLK_INV(1) | S_587_GLI_INV(V_587_GLI_ALL); /* Wait for the PWS counter. */ ac_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); - ac_pm4_cmd_add(pm4, S_580_PWS_STAGE_SEL(V_580_CP_PFP) | - S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | - S_580_PWS_ENA2(1) | - S_580_PWS_COUNT(0)); + ac_pm4_cmd_add(pm4, S_581B_PWS_STAGE_SEL(V_581B_CP_PFP) | + S_581B_PWS_COUNTER_SEL(V_581B_TS_SELECT) | + S_581B_PWS_ENA2(1) | + S_581B_PWS_COUNT(0)); ac_pm4_cmd_add(pm4, 0xffffffff); /* GCR_SIZE */ ac_pm4_cmd_add(pm4, 0x01ffffff); /* GCR_SIZE_HI */ ac_pm4_cmd_add(pm4, 0); /* GCR_BASE_LO */ ac_pm4_cmd_add(pm4, 0); /* GCR_BASE_HI */ - ac_pm4_cmd_add(pm4, S_585_PWS_ENA(1)); + ac_pm4_cmd_add(pm4, S_586B_PWS_ENA(1)); ac_pm4_cmd_add(pm4, gcr_cntl); /* GCR_CNTL */ } else if (info->gfx_level >= GFX10) { - unsigned gcr_cntl = S_586_GL2_INV(1) | S_586_GL2_WB(1) | - S_586_GLM_INV(1) | S_586_GLM_WB(1) | - S_586_GL1_INV(1) | S_586_GLV_INV(1) | - S_586_GLK_INV(1) | S_586_GLI_INV(V_586_GLI_ALL); + unsigned gcr_cntl = S_587_GL2_INV(1) | S_587_GL2_WB(1) | + S_587_GLM_INV(1) | S_587_GLM_WB(1) | + S_587_GL1_INV(1) | S_587_GLV_INV(1) | + S_587_GLK_INV(1) | S_587_GLI_INV(V_587_GLI_ALL); ac_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); ac_pm4_cmd_add(pm4, 0); /* CP_COHER_CNTL */ diff --git a/src/amd/common/ac_spm.c b/src/amd/common/ac_spm.c index fa842d3558b..36095fedf21 100644 --- a/src/amd/common/ac_spm.c +++ b/src/amd/common/ac_spm.c @@ -1812,10 +1812,10 @@ ac_emit_spm_muxsel(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, /* Write the muxsel line configuration with MUXSEL_DATA. */ ac_cmdbuf_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0)); - ac_cmdbuf_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME) | - S_370_WR_ONE_ADDR(1)); + ac_cmdbuf_emit(S_371_DST_SEL(V_371_MEM_MAPPED_REGISTER) | + S_371_WR_CONFIRM(V_371_WAIT_FOR_WRITE_CONFIRMATION) | + S_371_ENGINE_SEL(V_371_MICRO_ENGINE) | + S_371_ADDR_INCR(V_371_DO_NOT_INCREMENT_ADDRESS)); ac_cmdbuf_emit(rlc_muxsel_data >> 2); ac_cmdbuf_emit(0); ac_cmdbuf_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE); diff --git a/src/amd/packets/parse_cp_pm4_table_data_json.py b/src/amd/packets/parse_cp_pm4_table_data_json.py index 8bff5ce1e93..842d26d54ce 100644 --- a/src/amd/packets/parse_cp_pm4_table_data_json.py +++ b/src/amd/packets/parse_cp_pm4_table_data_json.py @@ -57,7 +57,7 @@ packet_field_register_map = { # (name, first_bit): (register, mask) ('COHER_CNTL', 0): ('R_0301F0_CP_COHER_CNTL', ~0), ('EVENT_TYPE', 0): ('R_028A90_VGT_EVENT_INITIATOR', 0x3F), - ('GCR_CNTL', 0): ('R_586_GCR_CNTL', ~0), + ('GCR_CNTL', 0): ('R_587_GCR_CNTL', ~0), ('DISPATCH_INITIATOR', 0): ('R_00B800_COMPUTE_DISPATCH_INITIATOR', ~0), ('DRAW_INITIATOR', 0): ('R_0287F0_VGT_DRAW_INITIATOR', ~0), } diff --git a/src/amd/registers/pkt3.json b/src/amd/registers/pkt3.json index 1155afa5111..71298ec5836 100644 --- a/src/amd/registers/pkt3.json +++ b/src/amd/registers/pkt3.json @@ -1,18 +1,19 @@ { + "comment": "This file is only for definitions not present in the generated packet headers.", "enums": { - "COMMAND__SAIC": { + "CP_DMA_COMMAND__SAIC": { "entries": [ {"name": "INCREMENT", "value": 0}, {"name": "NO_INCREMENT", "value": 1} ] }, - "COMMAND__SAS": { + "CP_DMA_COMMAND__SAS": { "entries": [ {"name": "MEMORY", "value": 0}, {"name": "REGISTER", "value": 1} ] }, - "COMMAND__SRC_SWAP": { + "CP_DMA_COMMAND__SRC_SWAP": { "entries": [ {"name": "NONE", "value": 0}, {"name": "8_IN_16", "value": 1}, @@ -20,31 +21,9 @@ {"name": "8_IN_64", "value": 3} ] }, - "CONTROL__DST_SEL": { + "WRITE_DATA_CONTROL__DST_SEL": { "entries": [ - {"name": "MEM_MAPPED_REGISTER", "value": 0}, - {"comment": "sync across GRBM", "name": "MEM_GRBM", "value": 1}, - {"name": "TC_L2", "value": 2}, - {"name": "GDS", "value": 3}, - {"name": "RESERVED", "value": 4} - ] - }, - "CONTROL__DST_SEL_cik": { - "entries": [ - {"name": "MEM_MAPPED_REGISTER", "value": 0}, - {"comment": "sync across GRBM", "name": "MEM_GRBM", "value": 1}, - {"name": "TC_L2", "value": 2}, - {"name": "GDS", "value": 3}, - {"name": "RESERVED", "value": 4}, - {"name": "MEM", "value": 5} - ] - }, - "CONTROL__ENGINE_SEL": { - "entries": [ - {"name": "ME", "value": 0}, - {"name": "PFP", "value": 1}, - {"name": "CE", "value": 2}, - {"name": "DE", "value": 3} + {"comment": "sync across GRBM", "name": "MEM_GRBM", "value": 1} ] }, "CP_DMA_WORD1__DST_SEL": { @@ -60,14 +39,6 @@ {"name": "DST_ADDR_TC_L2", "value": 3} ] }, - "CP_DMA_WORD1__DST_SEL_gfx9": { - "entries": [ - {"name": "DST_ADDR", "value": 0}, - {"comment": "program DAS to 1 as well", "name": "GDS", "value": 1}, - {"name": "NOWHERE", "value": 2}, - {"name": "DST_ADDR_TC_L2", "value": 3} - ] - }, "CP_DMA_WORD1__ENGINE": { "entries": [ {"name": "ME", "value": 0}, @@ -118,152 +89,48 @@ {"name": "SEQ_FORWARD", "value": 1}, {"name": "SEQ_REVERSE", "value": 2} ] - }, - "PWS_STAGE_SEL": { - "entries": [ - {"name": "PRE_DEPTH", "value": 0}, - {"name": "PRE_SHADER", "value": 1}, - {"name": "PRE_COLOR", "value": 2}, - {"name": "PRE_PIX_SHADER", "value": 3}, - {"name": "CP_PFP", "value": 4}, - {"name": "CP_ME", "value": 5} - ] - }, - "PWS_COUNTER_SEL": { - "entries": [ - {"name": "TS_SELECT", "value": 0}, - {"name": "PS_SELECT", "value": 1}, - {"name": "CS_SELECT", "value": 2} - ] - }, - "VGT_EVENT_TYPE_gfx11": { - "entries": [ - {"name": "Reserved_0x00", "value": 0}, - {"name": "SAMPLE_STREAMOUTSTATS1", "value": 1}, - {"name": "SAMPLE_STREAMOUTSTATS2", "value": 2}, - {"name": "SAMPLE_STREAMOUTSTATS3", "value": 3}, - {"name": "CACHE_FLUSH_TS", "value": 4}, - {"name": "CONTEXT_DONE", "value": 5}, - {"name": "CACHE_FLUSH", "value": 6}, - {"name": "CS_PARTIAL_FLUSH", "value": 7}, - {"name": "VGT_STREAMOUT_SYNC", "value": 8}, - {"name": "Reserved_0x09", "value": 9}, - {"name": "VGT_STREAMOUT_RESET", "value": 10}, - {"name": "END_OF_PIPE_INCR_DE", "value": 11}, - {"name": "END_OF_PIPE_IB_END", "value": 12}, - {"name": "RST_PIX_CNT", "value": 13}, - {"name": "BREAK_BATCH", "value": 14}, - {"name": "VS_PARTIAL_FLUSH", "value": 15}, - {"name": "PS_PARTIAL_FLUSH", "value": 16}, - {"name": "FLUSH_HS_OUTPUT", "value": 17}, - {"name": "FLUSH_DFSM", "value": 18}, - {"name": "RESET_TO_LOWEST_VGT", "value": 19}, - {"name": "CACHE_FLUSH_AND_INV_TS_EVENT", "value": 20}, - {"name": "WAIT_SYNC", "value": 21}, - {"name": "CACHE_FLUSH_AND_INV_EVENT", "value": 22}, - {"name": "PERFCOUNTER_START", "value": 23}, - {"name": "PERFCOUNTER_STOP", "value": 24}, - {"name": "PIPELINESTAT_START", "value": 25}, - {"name": "PIPELINESTAT_STOP", "value": 26}, - {"name": "PERFCOUNTER_SAMPLE", "value": 27}, - {"name": "FLUSH_ES_OUTPUT", "value": 28}, - {"name": "BIN_CONF_OVERRIDE_CHECK", "value": 29}, - {"name": "SAMPLE_PIPELINESTAT", "value": 30}, - {"name": "SO_VGTSTREAMOUT_FLUSH", "value": 31}, - {"name": "SAMPLE_STREAMOUTSTATS", "value": 32}, - {"name": "RESET_VTX_CNT", "value": 33}, - {"name": "BLOCK_CONTEXT_DONE", "value": 34}, - {"name": "CS_CONTEXT_DONE", "value": 35}, - {"name": "VGT_FLUSH", "value": 36}, - {"name": "TGID_ROLLOVER", "value": 37}, - {"name": "SQ_NON_EVENT", "value": 38}, - {"name": "SC_SEND_DB_VPZ", "value": 39}, - {"name": "BOTTOM_OF_PIPE_TS", "value": 40}, - {"name": "FLUSH_SX_TS", "value": 41}, - {"name": "DB_CACHE_FLUSH_AND_INV", "value": 42}, - {"name": "FLUSH_AND_INV_DB_DATA_TS", "value": 43}, - {"name": "FLUSH_AND_INV_DB_META", "value": 44}, - {"name": "FLUSH_AND_INV_CB_DATA_TS", "value": 45}, - {"name": "FLUSH_AND_INV_CB_META", "value": 46}, - {"name": "CS_DONE", "value": 47}, - {"name": "PS_DONE", "value": 48}, - {"name": "FLUSH_AND_INV_CB_PIXEL_DATA", "value": 49}, - {"name": "SX_CB_RAT_ACK_REQUEST", "value": 50}, - {"name": "THREAD_TRACE_START", "value": 51}, - {"name": "THREAD_TRACE_STOP", "value": 52}, - {"name": "THREAD_TRACE_MARKER", "value": 53}, - {"name": "THREAD_TRACE_DRAW", "value": 54}, - {"name": "THREAD_TRACE_FINISH", "value": 55}, - {"name": "PIXEL_PIPE_STAT_CONTROL", "value": 56}, - {"name": "PIXEL_PIPE_STAT_DUMP", "value": 57}, - {"name": "PIXEL_PIPE_STAT_RESET", "value": 58}, - {"name": "CONTEXT_SUSPEND", "value": 59}, - {"name": "OFFCHIP_HS_DEALLOC", "value": 60}, - {"name": "ENABLE_NGG_PIPELINE", "value": 61}, - {"name": "ENABLE_LEGACY_PIPELINE", "value": 62}, - {"name": "DRAW_DONE", "value": 63} - ] } }, "register_mappings": [ { - "comment": "This is at offset 0x415 instead of 0x414 due to a conflict with SQ_WAVE_GPR_ALLOC", "chips": ["gfx6", "gfx7", "gfx8", "gfx81"], "map": {"at": 1045, "to": "pkt3"}, - "name": "COMMAND", - "type_ref": "COMMAND" - }, - { - "chips": ["gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1045, "to": "pkt3"}, - "name": "COMMAND", - "type_ref": "COMMAND_gfx9" + "name": "CP_DMA_COMMAND", + "type_ref": "CP_DMA_COMMAND" }, { "chips": ["gfx6"], - "map": {"at": 880, "to": "pkt3"}, - "name": "CONTROL", - "type_ref": "CONTROL" + "map": {"at": 881, "to": "pkt3"}, + "name": "WRITE_DATA_CONTROL", + "type_ref": "WRITE_DATA_CONTROL" }, { - "chips": ["gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 880, "to": "pkt3"}, - "name": "CONTROL", - "type_ref": "CONTROL_cik" - }, - { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1040, "to": "pkt3"}, + "chips": ["gfx6", "gfx7", "gfx8", "gfx81"], + "map": {"at": 1041, "to": "pkt3"}, "name": "CP_DMA_WORD0", "type_ref": "CP_DMA_WORD0" }, { "chips": ["gfx6"], - "map": {"at": 1041, "to": "pkt3"}, + "map": {"at": 1042, "to": "pkt3"}, "name": "CP_DMA_WORD1", "type_ref": "CP_DMA_WORD1" }, { "chips": ["gfx7", "gfx8", "gfx81"], - "map": {"at": 1041, "to": "pkt3"}, + "map": {"at": 1042, "to": "pkt3"}, "name": "CP_DMA_WORD1", "type_ref": "CP_DMA_WORD1_cik" }, { - "chips": ["gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1041, "to": "pkt3"}, - "name": "CP_DMA_WORD1", - "type_ref": "CP_DMA_WORD1_gfx9" - }, - { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1042, "to": "pkt3"}, + "chips": ["gfx6", "gfx7", "gfx8", "gfx81"], + "map": {"at": 1043, "to": "pkt3"}, "name": "CP_DMA_WORD2", "type_ref": "CP_DMA_WORD2" }, { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1043, "to": "pkt3"}, + "chips": ["gfx6", "gfx7", "gfx8", "gfx81"], + "map": {"at": 1044, "to": "pkt3"}, "name": "CP_DMA_WORD3", "type_ref": "CP_DMA_WORD3" }, @@ -279,128 +146,58 @@ "name": "DMA_DATA_WORD0", "type_ref": "DMA_DATA_WORD0_cik" }, - { - "chips": ["gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1281, "to": "pkt3"}, - "name": "DMA_DATA_WORD0", - "type_ref": "DMA_DATA_WORD0_gfx9" - }, - { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 882, "to": "pkt3"}, - "name": "DST_ADDR_HI" - }, - { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1286, "to": "pkt3"}, - "name": "DST_ADDR_HI" - }, - { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 881, "to": "pkt3"}, - "name": "DST_ADDR_LO" - }, - { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1285, "to": "pkt3"}, - "name": "DST_ADDR_LO" - }, { "chips": ["gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1414, "to": "pkt3"}, + "map": {"at": 1415, "to": "pkt3"}, "name": "GCR_CNTL", "type_ref": "GCR_CNTL" }, { "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1009, "to": "pkt3"}, + "map": {"at": 1010, "to": "pkt3"}, "name": "IB_BASE_HI" }, { "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1008, "to": "pkt3"}, + "map": {"at": 1009, "to": "pkt3"}, "name": "IB_BASE_LO" }, { "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1010, "to": "pkt3"}, + "map": {"at": 1011, "to": "pkt3"}, "name": "IB_CONTROL", "type_ref": "IB_CONTROL" }, { "chips": ["gfx10", "gfx103"], - "map": {"at": 1168, "to": "pkt3"}, + "map": {"at": 1169, "to": "pkt3"}, "name": "RELEASE_MEM_OP", "type_ref": "RELEASE_MEM_OP" }, { "chips": ["gfx11", "gfx12"], - "map": {"at": 1168, "to": "pkt3"}, + "map": {"at": 1169, "to": "pkt3"}, "name": "RELEASE_MEM_OP", "type_ref": "RELEASE_MEM_OP_gfx11" - }, - { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1283, "to": "pkt3"}, - "name": "SRC_ADDR_HI" - }, - { - "chips": ["gfx6", "gfx7", "gfx8", "gfx81", "gfx9", "gfx940", "gfx10", "gfx103", "gfx11", "gfx12"], - "map": {"at": 1282, "to": "pkt3"}, - "name": "SRC_ADDR_LO" - }, - { - "chips": ["gfx11", "gfx12"], - "map": {"at": 1408, "to": "pkt3"}, - "name": "ACQUIRE_MEM_PWS_2", - "type_ref": "ACQUIRE_MEM_PWS_2" - }, - { - "chips": ["gfx11", "gfx12"], - "map": {"at": 1413, "to": "pkt3"}, - "name": "ACQUIRE_MEM_PWS_7", - "type_ref": "ACQUIRE_MEM_PWS_7" } ], "register_types": { - "COMMAND": { + "CP_DMA_COMMAND": { "fields": [ {"bits": [0, 20], "name": "BYTE_COUNT"}, {"bits": [21, 21], "name": "DISABLE_WR_CONFIRM"}, - {"bits": [22, 23], "enum_ref": "COMMAND__SRC_SWAP", "name": "SRC_SWAP"}, - {"bits": [24, 25], "enum_ref": "COMMAND__SRC_SWAP", "name": "DST_SWAP"}, - {"bits": [26, 26], "enum_ref": "COMMAND__SAS", "name": "SAS"}, - {"bits": [27, 27], "enum_ref": "COMMAND__SAS", "name": "DAS"}, - {"bits": [28, 28], "enum_ref": "COMMAND__SAIC", "name": "SAIC"}, - {"bits": [29, 29], "enum_ref": "COMMAND__SAIC", "name": "DAIC"}, + {"bits": [22, 23], "enum_ref": "CP_DMA_COMMAND__SRC_SWAP", "name": "SRC_SWAP"}, + {"bits": [24, 25], "enum_ref": "CP_DMA_COMMAND__SRC_SWAP", "name": "DST_SWAP"}, + {"bits": [26, 26], "enum_ref": "CP_DMA_COMMAND__SAS", "name": "SAS"}, + {"bits": [27, 27], "enum_ref": "CP_DMA_COMMAND__SAS", "name": "DAS"}, + {"bits": [28, 28], "enum_ref": "CP_DMA_COMMAND__SAIC", "name": "SAIC"}, + {"bits": [29, 29], "enum_ref": "CP_DMA_COMMAND__SAIC", "name": "DAIC"}, {"bits": [30, 30], "name": "RAW_WAIT"} ] }, - "COMMAND_gfx9": { + "WRITE_DATA_CONTROL": { "fields": [ - {"bits": [0, 25], "name": "BYTE_COUNT"}, - {"bits": [26, 26], "enum_ref": "COMMAND__SAS", "name": "SAS"}, - {"bits": [27, 27], "enum_ref": "COMMAND__SAS", "name": "DAS"}, - {"bits": [28, 28], "enum_ref": "COMMAND__SAIC", "name": "SAIC"}, - {"bits": [29, 29], "enum_ref": "COMMAND__SAIC", "name": "DAIC"}, - {"bits": [30, 30], "name": "RAW_WAIT"}, - {"bits": [31, 31], "name": "DISABLE_WR_CONFIRM"} - ] - }, - "CONTROL": { - "fields": [ - {"bits": [8, 11], "enum_ref": "CONTROL__DST_SEL", "name": "DST_SEL"}, - {"bits": [16, 16], "name": "WR_ONE_ADDR"}, - {"bits": [20, 20], "name": "WR_CONFIRM"}, - {"bits": [30, 31], "enum_ref": "CONTROL__ENGINE_SEL", "name": "ENGINE_SEL"} - ] - }, - "CONTROL_cik": { - "fields": [ - {"bits": [8, 11], "enum_ref": "CONTROL__DST_SEL_cik", "name": "DST_SEL"}, - {"bits": [16, 16], "name": "WR_ONE_ADDR"}, - {"bits": [20, 20], "name": "WR_CONFIRM"}, - {"bits": [30, 31], "enum_ref": "CONTROL__ENGINE_SEL", "name": "ENGINE_SEL"} + {"bits": [8, 11], "enum_ref": "WRITE_DATA_CONTROL__DST_SEL", "name": "DST_SEL"} ] }, "CP_DMA_WORD0": { @@ -426,15 +223,6 @@ {"bits": [31, 31], "name": "CP_SYNC"} ] }, - "CP_DMA_WORD1_gfx9": { - "fields": [ - {"bits": [0, 15], "name": "SRC_ADDR_HI"}, - {"bits": [20, 21], "enum_ref": "CP_DMA_WORD1__DST_SEL_gfx9", "name": "DST_SEL"}, - {"bits": [27, 27], "enum_ref": "CP_DMA_WORD1__ENGINE", "name": "ENGINE"}, - {"bits": [29, 30], "enum_ref": "CP_DMA_WORD1__SRC_SEL_cik", "name": "SRC_SEL"}, - {"bits": [31, 31], "name": "CP_SYNC"} - ] - }, "CP_DMA_WORD2": { "fields": [ {"bits": [0, 31], "name": "DST_ADDR_LO"} @@ -463,16 +251,6 @@ {"bits": [31, 31], "name": "CP_SYNC"} ] }, - "DMA_DATA_WORD0_gfx9": { - "fields": [ - {"bits": [0, 0], "enum_ref": "CP_DMA_WORD1__ENGINE", "name": "ENGINE"}, - {"bits": [13, 14], "name": "SRC_CACHE_POLICY"}, - {"bits": [20, 21], "enum_ref": "CP_DMA_WORD1__DST_SEL_gfx9", "name": "DST_SEL"}, - {"bits": [25, 26], "name": "DST_CACHE_POLICY"}, - {"bits": [29, 30], "enum_ref": "CP_DMA_WORD1__SRC_SEL_cik", "name": "SRC_SEL"}, - {"bits": [31, 31], "name": "CP_SYNC"} - ] - }, "GCR_CNTL": { "fields": [ {"bits": [0, 1], "enum_ref": "GCR_GLI_INV", "name": "GLI_INV"}, @@ -502,8 +280,6 @@ }, "RELEASE_MEM_OP": { "fields": [ - {"bits": [0, 5], "name": "EVENT_TYPE"}, - {"bits": [8, 11], "name": "EVENT_INDEX"}, {"bits": [12, 12], "name": "GLM_WB"}, {"bits": [13, 13], "name": "GLM_INV"}, {"bits": [14, 14], "name": "GLV_INV"}, @@ -518,9 +294,6 @@ }, "RELEASE_MEM_OP_gfx11": { "fields": [ - {"bits": [0, 5], "enum_ref": "VGT_EVENT_TYPE_gfx11", "name": "EVENT_TYPE"}, - {"bits": [7, 7], "name": "WAIT_SYNC"}, - {"bits": [8, 11], "name": "EVENT_INDEX"}, {"bits": [12, 12], "name": "GLM_WB"}, {"bits": [13, 13], "name": "GLM_INV"}, {"bits": [14, 14], "name": "GLV_INV"}, @@ -531,24 +304,7 @@ {"bits": [20, 20], "name": "GL2_INV"}, {"bits": [21, 21], "name": "GL2_WB"}, {"bits": [22, 23], "enum_ref": "GCR_SEQ", "name": "SEQ"}, - {"bits": [24, 24], "name": "GLK_WB"}, - {"bits": [25, 26], "name": "CACHE_POLICY"}, - {"bits": [28, 29], "name": "EXECUTE"}, - {"bits": [30, 30], "name": "GLK_INV"}, - {"bits": [31, 31], "name": "PWS_ENABLE"} - ] - }, - "ACQUIRE_MEM_PWS_2": { - "fields": [ - {"bits": [11, 13], "enum_ref": "PWS_STAGE_SEL", "name": "PWS_STAGE_SEL"}, - {"bits": [14, 15], "enum_ref": "PWS_COUNTER_SEL", "name": "PWS_COUNTER_SEL"}, - {"bits": [17, 17], "name": "PWS_ENA2"}, - {"bits": [18, 23], "name": "PWS_COUNT"} - ] - }, - "ACQUIRE_MEM_PWS_7": { - "fields": [ - {"bits": [31, 31], "name": "PWS_ENA"} + {"bits": [24, 24], "name": "GLK_WB"} ] } } diff --git a/src/amd/vulkan/meta/radv_meta_buffer.c b/src/amd/vulkan/meta/radv_meta_buffer.c index 66f3889e392..0aaaec1a6af 100644 --- a/src/amd/vulkan/meta/radv_meta_buffer.c +++ b/src/amd/vulkan/meta/radv_meta_buffer.c @@ -437,7 +437,7 @@ radv_update_memory_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const voi radv_emit_cache_flush(cmd_buffer); radeon_check_space(device->ws, cs->b, words + 4); - ac_emit_cp_write_data(cs->b, V_370_ME, mec ? V_370_MEM : V_370_MEM_GRBM, va, words, data, false); + ac_emit_cp_write_data(cs->b, V_371_MICRO_ENGINE, mec ? V_371_MEMORY : V_371_MEM_GRBM, va, words, data, false); if (radv_device_fault_detection_enabled(device)) radv_cmd_buffer_trace_emit(cmd_buffer); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index b8da4f70fba..cd119816aae 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1439,7 +1439,7 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) va += offsetof(struct radv_trace_data, secondary_id); ++cmd_buffer->state.trace_id; - radv_write_data(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id, false); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 1, &cmd_buffer->state.trace_id, false); radeon_check_space(device->ws, cs->b, 2); @@ -1666,10 +1666,10 @@ radv_gang_finalize(struct radv_cmd_buffer *cmd_buffer) const uint32_t zero = 0; /* Follower: write 0 to the leader->follower semaphore. */ - radv_cs_write_data(device, ace_cs, V_370_ME, leader2follower_va, 1, &zero, false); + radv_cs_write_data(device, ace_cs, V_371_MICRO_ENGINE, leader2follower_va, 1, &zero, false); /* Leader: write 0 to the follower->leader semaphore. */ - radv_write_data(cmd_buffer, V_370_ME, follower2leader_va, 1, &zero, false); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, follower2leader_va, 1, &zero, false); } return radv_finalize_cmd_stream(device, cmd_buffer->gang.cs); @@ -1747,7 +1747,7 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pip data[0] = pipeline_address; data[1] = pipeline_address >> 32; - radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 2, data, false); } static void @@ -1762,7 +1762,7 @@ radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr data[0] = vb_ptr; data[1] = vb_ptr >> 32; - radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 2, data, false); } static void @@ -1778,7 +1778,7 @@ radv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader data[0] = prolog_address; data[1] = prolog_address >> 32; - radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 2, data, false); } static void @@ -1794,7 +1794,7 @@ radv_save_ps_epilog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader data[0] = epilog_address; data[1] = epilog_address >> 32; - radv_write_data(cmd_buffer, V_370_ME, va, 2, data, false); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 2, data, false); } void @@ -1824,7 +1824,7 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bi data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; } - radv_write_data(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data, false); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, MAX_SETS * 2, data, false); } static void @@ -5048,7 +5048,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image /* Use the fastest way when both aspects are used. */ ASSERTED unsigned cdw_end = - radv_cs_write_data_head(device, cs, V_370_ME, va, 2 * level_count, cmd_buffer->state.predicating); + radv_cs_write_data_head(device, cs, V_371_MICRO_ENGINE, va, 2 * level_count, cmd_buffer->state.predicating); radeon_begin(cs); @@ -5073,7 +5073,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image value = ds_clear_value.stencil; } - radv_write_data(cmd_buffer, V_370_ME, va, 1, &value, cmd_buffer->state.predicating); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 1, &value, cmd_buffer->state.predicating); } } @@ -5096,7 +5096,7 @@ radv_update_hiz_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * const uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); ASSERTED unsigned cdw_end = - radv_cs_write_data_head(device, cs, V_370_PFP, va, level_count, cmd_buffer->state.predicating); + radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, level_count, cmd_buffer->state.predicating); radeon_begin(cs); for (uint32_t l = 0; l < level_count; l++) @@ -5123,7 +5123,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct ra uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); ASSERTED unsigned cdw_end = - radv_cs_write_data_head(device, cs, V_370_PFP, va, level_count, cmd_buffer->state.predicating); + radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, level_count, cmd_buffer->state.predicating); radeon_begin(cs); @@ -5224,7 +5224,7 @@ radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel); uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); - ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, V_370_PFP, va, 2 * level_count, false); + ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, 2 * level_count, false); radeon_begin(cs); @@ -5256,7 +5256,7 @@ radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image * assert(radv_dcc_enabled(image, range->baseMipLevel)); - ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, V_370_PFP, va, 2 * level_count, false); + ASSERTED unsigned cdw_end = radv_cs_write_data_head(device, cs, V_371_PREFETCH_PARSER, va, 2 * level_count, false); radeon_begin(cs); @@ -5314,7 +5314,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_im uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel); ASSERTED unsigned cdw_end = - radv_cs_write_data_head(device, cs, V_370_ME, va, 2 * level_count, cmd_buffer->state.predicating); + radv_cs_write_data_head(device, cs, V_371_MICRO_ENGINE, va, 2 * level_count, cmd_buffer->state.predicating); radeon_begin(cs); @@ -7792,7 +7792,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); cmd_buffer->gfx9_fence_va += fence_offset; - radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8); + radv_emit_clear_data(cmd_buffer, V_371_PREFETCH_PARSER, cmd_buffer->gfx9_fence_va, 8); if (pdev->info.gfx_level == GFX9) { /* Allocate a buffer for the EOP bug on GFX9. */ @@ -7805,7 +7805,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; - radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_eop_bug_va, 16 * num_db); + radv_emit_clear_data(cmd_buffer, V_371_PREFETCH_PARSER, cmd_buffer->gfx9_eop_bug_va, 16 * num_db); } } @@ -10653,7 +10653,7 @@ radv_gfx12_emit_wa(const struct radv_device *device, const struct radv_cmd_state assert(pdev->info.gfx_level == GFX12); radeon_begin(cs); radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); - radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); + radeon_emit(S_491_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_491_EVENT_INDEX(5)); radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ radeon_emit(0); /* ADDRESS_LO */ radeon_emit(0); /* ADDRESS_HI */ @@ -15246,10 +15246,10 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipe if (!(stageMask & ~top_of_pipe_flags) && cmd_buffer->qf != RADV_QUEUE_COMPUTE) { /* Just need to sync the PFP engine. */ - radv_write_data(cmd_buffer, V_370_PFP, va, 1, &value, false); + radv_write_data(cmd_buffer, V_371_PREFETCH_PARSER, va, 1, &value, false); } else if (!(stageMask & ~post_index_fetch_flags)) { /* Sync ME because PFP reads index and indirect buffers. */ - radv_write_data(cmd_buffer, V_370_ME, va, 1, &value, false); + radv_write_data(cmd_buffer, V_371_MICRO_ENGINE, va, 1, &value, false); } else { unsigned event_type; diff --git a/src/amd/vulkan/radv_cp_dma.c b/src/amd/vulkan/radv_cp_dma.c index 9275a90542c..495c709bd46 100644 --- a/src/amd/vulkan/radv_cp_dma.c +++ b/src/amd/vulkan/radv_cp_dma.c @@ -34,9 +34,7 @@ static inline unsigned cp_dma_max_byte_count(enum amd_gfx_level gfx_level) { - unsigned max = gfx_level >= GFX11 ? 32767 - : gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) - : S_415_BYTE_COUNT_GFX6(~0u); + unsigned max = gfx_level >= GFX11 ? 32767 : gfx_level >= GFX9 ? S_506_BYTE_COUNT(~0u) : S_415_BYTE_COUNT(~0u); /* make it aligned for optimal performance */ return max & ~(SI_CPDMA_ALIGNMENT - 1); @@ -61,25 +59,25 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool radeon_check_space(device->ws, cs->b, 9); if (pdev->info.gfx_level >= GFX9) - command |= S_415_BYTE_COUNT_GFX9(size); + command |= S_506_BYTE_COUNT(size); else - command |= S_415_BYTE_COUNT_GFX6(size); + command |= S_415_BYTE_COUNT(size); /* Sync flags. */ if (flags & CP_DMA_SYNC) - header |= S_411_CP_SYNC(1); + header |= S_501_CP_SYNC(1); if (flags & CP_DMA_RAW_WAIT) - command |= S_415_RAW_WAIT(1); + command |= S_506_RAW_WAIT(1); /* Src and dst flags. */ if (cp_dma_tc_l2_flag) - header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); + header |= S_501_DST_SEL(V_501_DST_ADDR_USING_L2); if (flags & CP_DMA_CLEAR) - header |= S_411_SRC_SEL(V_411_DATA); + header |= S_501_SRC_SEL(V_501_DATA); else if (cp_dma_tc_l2_flag) - header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); + header |= S_501_SRC_SEL(V_501_SRC_ADDR_USING_L2); radeon_begin(cs); if (pdev->info.gfx_level >= GFX7) { @@ -92,7 +90,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool radeon_emit(command); } else { assert(!cp_dma_tc_l2_flag); - header |= S_411_SRC_ADDR_HI(src_va >> 32); + header |= S_412_SRC_ADDR_HI(src_va >> 32); radeon_emit(PKT3(PKT3_CP_DMA, 4, predicating)); radeon_emit(src_va); /* SRC_ADDR_LO [31:0] */ radeon_emit(header); /* SRC_ADDR_HI [15:0] + flags. */ @@ -168,14 +166,14 @@ radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radv_cmd_stream uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va; if (gfx_level >= GFX9) { - command |= S_415_BYTE_COUNT_GFX9(aligned_size) | S_415_DISABLE_WR_CONFIRM_GFX9(1); - header |= S_411_DST_SEL(V_411_NOWHERE); + command |= S_506_BYTE_COUNT(aligned_size) | S_506_DISABLE_WR_CONFIRM(1); + header |= S_501_DST_SEL(V_501_DST_NOWHERE); } else { - command |= S_415_BYTE_COUNT_GFX6(aligned_size) | S_415_DISABLE_WR_CONFIRM_GFX6(1); - header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); + command |= S_415_BYTE_COUNT(aligned_size) | S_415_DISABLE_WR_CONFIRM(1); + header |= S_501_DST_SEL(V_501_DST_ADDR_USING_L2); } - header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); + header |= S_501_SRC_SEL(V_501_SRC_ADDR_USING_L2); radeon_begin(cs); radeon_emit(PKT3(PKT3_DMA_DATA, 5, predicating)); diff --git a/src/amd/vulkan/radv_cs.c b/src/amd/vulkan/radv_cs.c index 578d65971b9..71d8ccccdd4 100644 --- a/src/amd/vulkan/radv_cs.c +++ b/src/amd/vulkan/radv_cs.c @@ -45,43 +45,43 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev assert(!(flush_bits & (RADV_CMD_FLAG_VGT_STREAMOUT_SYNC))); if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) { - gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL); + gcr_cntl |= S_587_GLI_INV(V_587_GLI_ALL); *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE; } if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) { - gcr_cntl |= S_586_GLK_INV(1); + gcr_cntl |= S_587_GLK_INV(1); *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0; } if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) { - gcr_cntl |= S_586_GLV_INV(1); + gcr_cntl |= S_587_GLV_INV(1); *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0; } if (flush_bits & (RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE) && gfx_level < GFX12) { - gcr_cntl |= S_586_GL1_INV(1); + gcr_cntl |= S_587_GL1_INV(1); *sqtt_flush_bits |= RGP_FLUSH_INVAL_L1; } if (flush_bits & RADV_CMD_FLAG_INV_L2) { /* Writeback and invalidate everything in L2. */ - gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1); + gcr_cntl |= S_587_GL2_INV(1) | S_587_GL2_WB(1); *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2; } else if (flush_bits & RADV_CMD_FLAG_WB_L2) { /* Writeback but do not invalidate. * GLM doesn't support WB alone. If WB is set, INV must be set too. */ - gcr_cntl |= S_586_GL2_WB(1); + gcr_cntl |= S_587_GL2_WB(1); *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2; } if (gfx_level < GFX12 && (flush_bits & (RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_INV_L2_METADATA))) { - gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1); + gcr_cntl |= S_587_GLM_INV(1) | S_587_GLM_WB(1); } if (flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) { @@ -107,7 +107,7 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev } /* First flush CB/DB, then L1/L2. */ - gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD); + gcr_cntl |= S_587_SEQ(V_587_SEQ_FORWARD); if ((flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) == (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) { @@ -153,13 +153,13 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev /* Send an event that flushes caches. */ ac_emit_cp_release_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, gcr_cntl); - gcr_cntl &= C_586_GLK_WB & C_586_GLK_INV & C_586_GLV_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ + gcr_cntl &= C_587_GLK_WB & C_587_GLK_INV & C_587_GLV_INV & C_587_GL2_INV & C_587_GL2_WB; /* keep SEQ */ if (gfx_level < GFX12) - gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GL1_INV; + gcr_cntl &= C_587_GLM_WB & C_587_GLM_INV & C_587_GL1_INV; /* Wait for the event and invalidate remaining caches if needed. */ - ac_emit_cp_acquire_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, V_580_CP_PFP, 0, gcr_cntl); + ac_emit_cp_acquire_mem_pws(cs->b, gfx_level, cs->hw_ip, cb_db_event, V_581B_CP_PFP, 0, gcr_cntl); gcr_cntl = 0; /* all done */ } else { @@ -172,27 +172,27 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev * implied). */ /* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */ - unsigned glm_wb = G_586_GLM_WB(gcr_cntl); - unsigned glm_inv = G_586_GLM_INV(gcr_cntl); - unsigned glv_inv = G_586_GLV_INV(gcr_cntl); - unsigned gl1_inv = G_586_GL1_INV(gcr_cntl); - assert(G_586_GL2_US(gcr_cntl) == 0); - assert(G_586_GL2_RANGE(gcr_cntl) == 0); - assert(G_586_GL2_DISCARD(gcr_cntl) == 0); - unsigned gl2_inv = G_586_GL2_INV(gcr_cntl); - unsigned gl2_wb = G_586_GL2_WB(gcr_cntl); - unsigned gcr_seq = G_586_SEQ(gcr_cntl); + unsigned glm_wb = G_587_GLM_WB(gcr_cntl); + unsigned glm_inv = G_587_GLM_INV(gcr_cntl); + unsigned glv_inv = G_587_GLV_INV(gcr_cntl); + unsigned gl1_inv = G_587_GL1_INV(gcr_cntl); + assert(G_587_GL2_US(gcr_cntl) == 0); + assert(G_587_GL2_RANGE(gcr_cntl) == 0); + assert(G_587_GL2_DISCARD(gcr_cntl) == 0); + unsigned gl2_inv = G_587_GL2_INV(gcr_cntl); + unsigned gl2_wb = G_587_GL2_WB(gcr_cntl); + unsigned gcr_seq = G_587_SEQ(gcr_cntl); gcr_cntl &= - C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ + C_587_GLM_WB & C_587_GLM_INV & C_587_GLV_INV & C_587_GL1_INV & C_587_GL2_INV & C_587_GL2_WB; /* keep SEQ */ assert(flush_cnt); (*flush_cnt)++; radv_cs_emit_write_event_eop( cs, gfx_level, cb_db_event, - S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) | S_490_GL1_INV(gl1_inv) | - S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | S_490_SEQ(gcr_seq), + S_491_GLM_WB(glm_wb) | S_491_GLM_INV(glm_inv) | S_491_GLV_INV(glv_inv) | S_491_GL1_INV(gl1_inv) | + S_491_GL2_INV(gl2_inv) | S_491_GL2_WB(gl2_wb) | S_491_SEQ(gcr_seq), EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, 0); radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff); @@ -207,8 +207,8 @@ gfx10_cs_emit_cache_flush(struct radv_cmd_stream *cs, enum amd_gfx_level gfx_lev } /* Ignore fields that only modify the behavior of other fields. */ - if (gcr_cntl & C_586_GL2_RANGE & C_586_SEQ & (gfx_level >= GFX12 ? ~0 : C_586_GL1_RANGE)) { - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, gcr_cntl); + if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) { + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, gcr_cntl); } else if ((cb_db_event || (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) && !is_mec) { @@ -392,7 +392,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e } if ((flush_bits & RADV_CMD_FLAG_INV_L2) || (gfx_level <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) { - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(gfx_level >= GFX8)); cp_coher_cntl = 0; @@ -406,14 +406,15 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e * * WB doesn't work without NC. */ - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0; } if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) { - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, + cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); cp_coher_cntl = 0; *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0; @@ -424,7 +425,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radv_cmd_stream *cs, e * Therefore, it should be last. Done in PFP. */ if (cp_coher_cntl) - ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_580_CP_PFP, cp_coher_cntl); + ac_emit_cp_acquire_mem(cs->b, gfx_level, cs->hw_ip, V_581B_CP_PFP, cp_coher_cntl); radeon_begin(cs); diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index 894b977c1df..8dc71b13a0c 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -194,7 +194,7 @@ radv_cs_write_data_head(const struct radv_device *device, struct radv_cmd_stream const unsigned cdw_end = radeon_check_space(device->ws, cs->b, 4 + count); if (cs->hw_ip == AMD_IP_COMPUTE || cs->hw_ip == AMD_IP_GFX) { - ac_emit_cp_write_data_head(cs->b, engine_sel, V_370_MEM, va, count, predicating); + ac_emit_cp_write_data_head(cs->b, engine_sel, V_371_MEMORY, va, count, predicating); } else if (cs->hw_ip == AMD_IP_SDMA) { ac_emit_sdma_write_data_head(cs->b, va, count); } else { diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index cbe173b9e8b..573e6ba5cba 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -1017,7 +1017,7 @@ dgc_emit_indirect_buffer(struct dgc_cmdbuf *cs, nir_def *va, nir_def *ib_offset, nir_imm_int(b, PKT3(PKT3_INDIRECT_BUFFER, 2, 0)), nir_iadd(b, load_param32(b, upload_addr), ib_offset), nir_imm_int(b, pdev->info.address32_hi), - nir_ior_imm(b, ib_cdw, S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(false)), + nir_ior_imm(b, ib_cdw, S_3F3_CHAIN(1) | S_3F3_VALID(1) | S_3F3_PRE_ENA(false)), }; nir_store_global(b, nir_vec(b, packet, 4), va, .access = ACCESS_NON_READABLE); @@ -1277,7 +1277,7 @@ dgc_gfx12_emit_hiz_wa(struct dgc_cmdbuf *cs) if (pdev->gfx12_hiz_wa == RADV_GFX12_HIZ_WA_PARTIAL) { dgc_cs_begin(cs); dgc_cs_emit_imm(PKT3(PKT3_RELEASE_MEM, 6, 0)); - dgc_cs_emit_imm(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); + dgc_cs_emit_imm(S_491_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_491_EVENT_INDEX(5)); dgc_cs_emit_imm(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ dgc_cs_emit_imm(0); /* ADDRESS_LO */ dgc_cs_emit_imm(0); /* ADDRESS_HI */ diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 97e9eeededa..20cb76c1178 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -602,7 +602,7 @@ radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query if (end) { uint64_t signal_va = va + pool->b.stride - 8 - 8 * pass; - ac_emit_cp_write_data_imm(cs->b, V_370_ME, signal_va, 1); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, signal_va, 1); } *skip_dwords = cs->b->buf + cs->b->cdw - skip_dwords - 1; @@ -630,7 +630,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo radv_cs_add_buffer(device->ws, cs->b, device->perf_counter_bo); uint64_t perf_ctr_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; - ac_emit_cp_write_data_imm(cs->b, V_370_ME, perf_ctr_va, 0); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, perf_ctr_va, 0); radv_pc_wait_idle(cmd_buffer); radv_perfcounter_emit_reset(cs); diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 8516e42fecc..5308ba88b06 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -671,7 +671,7 @@ radv_begin_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_q radeon_check_space(device->ws, ace_cs->b, 11); gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset); - ac_emit_cp_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000); + ac_emit_cp_write_data_imm(ace_cs->b, V_371_MICRO_ENGINE, va + task_invoc_offset + 4, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -745,7 +745,7 @@ radv_end_pipeline_stat_query(struct radv_cmd_buffer *cmd_buffer, struct radv_que radeon_check_space(device->ws, ace_cs->b, 11); gfx10_copy_shader_query_ace(cmd_buffer, RADV_SHADER_QUERY_TS_INVOCATION_OFFSET, va + task_invoc_offset); - ac_emit_cp_write_data_imm(ace_cs->b, V_370_ME, va + task_invoc_offset + 4, 0x80000000); + ac_emit_cp_write_data_imm(ace_cs->b, V_371_MICRO_ENGINE, va + task_invoc_offset + 4, 0x80000000); cmd_buffer->state.active_pipeline_ace_queries--; @@ -970,11 +970,11 @@ radv_begin_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t i if (pdev->use_ngg_streamout) { /* generated prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 4, 0x80000000); /* written prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 8); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 12, 0x80000000); if (!cmd_buffer->state.active_emulated_prims_xfb_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY; @@ -999,11 +999,11 @@ radv_end_tfb_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint32_t ind if (pdev->use_ngg_streamout) { /* generated prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 20, 0x80000000); /* written prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(index), va + 24); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 28, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 28, 0x80000000); cmd_buffer->state.active_emulated_prims_xfb_queries--; @@ -1339,7 +1339,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool * if (pdev->info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query are always emulated. */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 4, 0x80000000); if (!cmd_buffer->state.active_emulated_prims_gen_queries) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_SHADER_QUERY; @@ -1363,7 +1363,7 @@ radv_begin_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool * if (pool->uses_emulated_queries) { /* generated prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 32); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 36, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 36, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -1388,7 +1388,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po if (pdev->info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query are always emulated. */ gfx10_copy_shader_query_gfx(cmd_buffer, false, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 20, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 20, 0x80000000); cmd_buffer->state.active_emulated_prims_gen_queries--; @@ -1412,7 +1412,7 @@ radv_end_pg_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *po if (pool->uses_emulated_queries) { /* generated prim counter */ gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 40); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 44, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 44, 0x80000000); cmd_buffer->state.active_emulated_prims_gen_queries--; @@ -1590,7 +1590,7 @@ radv_begin_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va) radv_emit_event_write(&pdev->info, cs, RADV_EVENT_WRITE_PIPELINE_STAT, va); } else { gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 4, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 4, 0x80000000); /* Record that the command buffer needs GDS. */ cmd_buffer->gds_needed = true; @@ -1627,7 +1627,7 @@ radv_end_ms_prim_query(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t cmd_buffer->gfx9_eop_bug_va); } else { gfx10_copy_shader_query_gfx(cmd_buffer, true, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8); - ac_emit_cp_write_data_imm(cs->b, V_370_ME, va + 12, 0x80000000); + ac_emit_cp_write_data_imm(cs->b, V_371_MICRO_ENGINE, va + 12, 0x80000000); cmd_buffer->state.active_emulated_prims_gen_queries--; diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index 67bb0b4eb9a..1074d395865 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -615,7 +615,7 @@ radv_emit_ge_rings(struct radv_device *device, struct radv_cmd_stream *cs, struc ac_emit_cp_release_mem_pws(cs->b, pdev->info.gfx_level, AMD_IP_GFX, V_028A90_BOTTOM_OF_PIPE_TS, 0); /* Wait for the PWS counter. */ - ac_emit_cp_acquire_mem_pws(cs->b, pdev->info.gfx_level, AMD_IP_GFX, V_028A90_BOTTOM_OF_PIPE_TS, V_580_CP_ME, 0, 0); + ac_emit_cp_acquire_mem_pws(cs->b, pdev->info.gfx_level, AMD_IP_GFX, V_028A90_BOTTOM_OF_PIPE_TS, V_581B_CP_ME, 0, 0); ac_emit_cp_gfx11_ge_rings(cs->b, &pdev->info, va, pdev->gfx12_hiz_wa == RADV_GFX12_HIZ_WA_PARTIAL); } @@ -1415,8 +1415,8 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) * meant to be executed on multiple compute engines at the same time. */ radv_cp_wait_mem(ace_pre_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace_wait_va, 1, 0xffffffff); - radv_cs_write_data(device, ace_pre_cs, V_370_ME, ace_wait_va, 1, &zero, false); - radv_cs_write_data(device, leader_pre_cs, V_370_ME, ace_wait_va, 1, &one, false); + radv_cs_write_data(device, ace_pre_cs, V_371_MICRO_ENGINE, ace_wait_va, 1, &zero, false); + radv_cs_write_data(device, leader_pre_cs, V_371_MICRO_ENGINE, ace_wait_va, 1, &one, false); /* Create postambles for gang submission. * This ensures that the gang leader waits for the whole gang, * which is necessary because the kernel signals the userspace fence @@ -1424,7 +1424,7 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) * same command buffers could be submitted again while still being executed. */ radv_cp_wait_mem(leader_post_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff); - radv_cs_write_data(device, leader_post_cs, V_370_ME, leader_wait_va, 1, &zero, false); + radv_cs_write_data(device, leader_post_cs, V_371_MICRO_ENGINE, leader_wait_va, 1, &zero, false); radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index a04824848e9..86486b9e10e 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -411,7 +411,7 @@ radv_amdgpu_cs_grow(struct ac_cmdbuf *_cs, size_t min_size) uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2); - ib_size = align(MIN2(ib_size, ~C_3F2_IB_SIZE), ib_alignment); + ib_size = align(MIN2(ib_size, ~C_3F3_IB_SIZE), ib_alignment); VkResult result = radv_amdgpu_cs_bo_create(cs, ib_size); @@ -437,7 +437,7 @@ radv_amdgpu_cs_grow(struct ac_cmdbuf *_cs, size_t min_size) cs->base.buf[cs->base.cdw - 4] = PKT3(PKT3_INDIRECT_BUFFER, 2, 0); cs->base.buf[cs->base.cdw - 3] = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va; cs->base.buf[cs->base.cdw - 2] = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va >> 32; - cs->base.buf[cs->base.cdw - 1] = S_3F2_CHAIN(1) | S_3F2_VALID(1); + cs->base.buf[cs->base.cdw - 1] = S_3F3_CHAIN(1) | S_3F3_VALID(1); cs->ib_size_ptr = cs->base.buf + cs->base.cdw - 1; } @@ -490,7 +490,7 @@ radv_amdgpu_cs_finalize(struct ac_cmdbuf *_cs) /* Emit 4 dwords of NOP, these will be replaced by the chaining INDIRECT_BUFFER. */ radv_amdgpu_cs_emit_nops(cs, 4); - assert(cs->base.cdw <= ~C_3F2_IB_SIZE); + assert(cs->base.cdw <= ~C_3F3_IB_SIZE); *cs->ib_size_ptr |= cs->base.cdw; } else { radv_amdgpu_winsys_cs_pad(_cs, 0); @@ -498,7 +498,7 @@ radv_amdgpu_cs_finalize(struct ac_cmdbuf *_cs) /* Append the current (last) IB to the array of IB buffers. */ radv_amdgpu_cs_add_ib_buffer(cs, cs->ib_buffer, cs->ib_buffer->va, - cs->chain_ib ? G_3F2_IB_SIZE(*cs->ib_size_ptr) : cs->base.cdw); + cs->chain_ib ? G_3F3_IB_SIZE(*cs->ib_size_ptr) : cs->base.cdw); /* Prevent freeing this BO twice. */ cs->ib_buffer = NULL; @@ -587,7 +587,7 @@ radv_amdgpu_cs_chain(struct ac_cmdbuf *cs, struct ac_cmdbuf *next_cs, bool pre_e cs->buf[cs->cdw - 4] = PKT3(PKT3_INDIRECT_BUFFER, 2, 0); cs->buf[cs->cdw - 3] = next_acs->ib.ib_mc_address; cs->buf[cs->cdw - 2] = next_acs->ib.ib_mc_address >> 32; - cs->buf[cs->cdw - 1] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(pre_ena) | next_acs->ib.size; + cs->buf[cs->cdw - 1] = S_3F3_CHAIN(1) | S_3F3_VALID(1) | S_3F3_PRE_ENA(pre_ena) | next_acs->ib.size; return true; } @@ -752,7 +752,7 @@ radv_amdgpu_cs_execute_ib(struct ac_cmdbuf *_cs, struct radeon_winsys_bo *bo, ui return; assert(ib_va && ib_va % cs->ws->info.ip[cs->hw_ip].ib_alignment == 0); - assert(cs->hw_ip == AMD_IP_GFX && cdw <= ~C_3F2_IB_SIZE); + assert(cs->hw_ip == AMD_IP_GFX && cdw <= ~C_3F3_IB_SIZE); ac_emit_cp_indirect_buffer(&cs->base, ib_va, cdw, 0, predicate); } @@ -772,17 +772,18 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui cs->ws->base.cs_execute_ib(_cs, NULL, va, cdw, predicate); } else { assert(va && va % cs->ws->info.ip[cs->hw_ip].ib_alignment == 0); - assert(cdw <= ~C_3F2_IB_SIZE); + assert(cdw <= ~C_3F3_IB_SIZE); /* Emit a WRITE_DATA packet to patch the DGC CS. */ const uint32_t chain_data[] = { PKT3(PKT3_INDIRECT_BUFFER, 2, 0), 0, 0, - S_3F2_CHAIN(1) | S_3F2_VALID(1), + S_3F3_CHAIN(1) | S_3F3_VALID(1), }; - ac_emit_cp_write_data(&cs->base, V_370_ME, V_370_MEM, trailer_va, ARRAY_SIZE(chain_data), chain_data, false); + ac_emit_cp_write_data(&cs->base, V_371_MICRO_ENGINE, V_371_MEMORY, trailer_va, ARRAY_SIZE(chain_data), chain_data, + false); /* Keep pointers for patching later. */ uint64_t *ib_va_ptr = (uint64_t *)(cs->base.buf + cs->base.cdw - 3); @@ -790,7 +791,7 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui /* Writeback L2 because CP isn't coherent with L2 on GFX6-8. */ if (cs->ws->info.gfx_level == GFX8) { - ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_580_CP_ME, + ac_emit_cp_acquire_mem(&cs->base, GFX8, AMD_IP_COMPUTE, V_581B_CP_ME, S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); } @@ -801,7 +802,7 @@ radv_amdgpu_cs_chain_dgc_ib(struct ac_cmdbuf *_cs, uint64_t va, uint32_t cdw, ui _cs->buf[_cs->cdw - 4] = PKT3(PKT3_INDIRECT_BUFFER, 2, 0); _cs->buf[_cs->cdw - 3] = va; _cs->buf[_cs->cdw - 2] = va >> 32; - _cs->buf[_cs->cdw - 1] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | cdw; + _cs->buf[_cs->cdw - 1] = S_3F3_CHAIN(1) | S_3F3_VALID(1) | cdw; /* Allocate a new CS BO with initial size. */ const uint64_t ib_size = radv_amdgpu_cs_get_initial_size(cs->ws, cs->hw_ip); diff --git a/src/gallium/drivers/radeonsi/si_barrier.c b/src/gallium/drivers/radeonsi/si_barrier.c index 43a9c336477..b983886182c 100644 --- a/src/gallium/drivers/radeonsi/si_barrier.c +++ b/src/gallium/drivers/radeonsi/si_barrier.c @@ -26,7 +26,7 @@ static struct si_resource *si_get_wait_mem_scratch_bo(struct si_context *ctx, PIPE_RESOURCE_FLAG_ENCRYPTED, PIPE_USAGE_DEFAULT, 4, sscreen->info.tcc_cache_line_size); - si_cp_write_data(ctx, ctx->wait_mem_scratch_tmz, 0, 4, V_370_MEM, V_370_ME, + si_cp_write_data(ctx, ctx->wait_mem_scratch_tmz, 0, 4, V_371_MEMORY, V_371_MICRO_ENGINE, &ctx->wait_mem_number); } @@ -159,13 +159,13 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) assert(ctx->gfx_level < GFX12 || !(flags & SI_BARRIER_INV_L2_METADATA)); if (flags & SI_BARRIER_INV_ICACHE) - gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL); + gcr_cntl |= S_587_GLI_INV(V_587_GLI_ALL); if (flags & SI_BARRIER_INV_SMEM) - gcr_cntl |= S_586_GLK_INV(1); + gcr_cntl |= S_587_GLK_INV(1); if (flags & SI_BARRIER_INV_VMEM) - gcr_cntl |= S_586_GLV_INV(1); + gcr_cntl |= S_587_GLV_INV(1); if (ctx->gfx_level < GFX12 && flags & (SI_BARRIER_INV_SMEM | SI_BARRIER_INV_VMEM)) - gcr_cntl |= S_586_GL1_INV(1); + gcr_cntl |= S_587_GL1_INV(1); /* The L2 cache ops are: * - INV: - invalidate lines that reflect memory (were loaded from memory) @@ -178,14 +178,14 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) * GLM doesn't support WB alone. If WB is set, INV must be set too. */ if (flags & SI_BARRIER_INV_L2) - gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1); /* Writeback and invalidate everything in L2. */ + gcr_cntl |= S_587_GL2_INV(1) | S_587_GL2_WB(1); /* Writeback and invalidate everything in L2. */ else if (flags & SI_BARRIER_WB_L2) - gcr_cntl |= S_586_GL2_WB(1); + gcr_cntl |= S_587_GL2_WB(1); /* Invalidate the metadata cache. */ if (ctx->gfx_level < GFX12 && flags & (SI_BARRIER_INV_L2 | SI_BARRIER_WB_L2 | SI_BARRIER_INV_L2_METADATA)) - gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1); + gcr_cntl |= S_587_GLM_INV(1) | S_587_GLM_WB(1); /* Flush CB/DB. Note that this also idles all shaders, including compute shaders. */ if (flags & (SI_BARRIER_SYNC_AND_INV_CB | SI_BARRIER_SYNC_AND_INV_DB)) { @@ -215,15 +215,15 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) radeon_end(); /* First flush CB/DB, then L1/L2. */ - gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD); + gcr_cntl |= S_587_SEQ(V_587_SEQ_FORWARD); if (ctx->gfx_level >= GFX11) { - si_cp_release_mem_pws(ctx, cs, cb_db_event, gcr_cntl & C_586_GLI_INV); + si_cp_release_mem_pws(ctx, cs, cb_db_event, gcr_cntl & C_587_GLI_INV); /* Wait for the event and invalidate remaining caches if needed. */ si_cp_acquire_mem_pws(ctx, cs, cb_db_event, - flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME, - gcr_cntl & ~C_586_GLI_INV, /* keep only GLI_INV */ + flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME, + gcr_cntl & ~C_587_GLI_INV, /* keep only GLI_INV */ 0, flags); gcr_cntl = 0; /* all done */ @@ -241,28 +241,28 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) ctx->wait_mem_number++; /* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */ - unsigned glm_wb = G_586_GLM_WB(gcr_cntl); - unsigned glm_inv = G_586_GLM_INV(gcr_cntl); - unsigned glv_inv = G_586_GLV_INV(gcr_cntl); - unsigned gl1_inv = G_586_GL1_INV(gcr_cntl); - assert(G_586_GL2_US(gcr_cntl) == 0); - assert(G_586_GL2_RANGE(gcr_cntl) == 0); - assert(G_586_GL2_DISCARD(gcr_cntl) == 0); - unsigned gl2_inv = G_586_GL2_INV(gcr_cntl); - unsigned gl2_wb = G_586_GL2_WB(gcr_cntl); - unsigned gcr_seq = G_586_SEQ(gcr_cntl); + unsigned glm_wb = G_587_GLM_WB(gcr_cntl); + unsigned glm_inv = G_587_GLM_INV(gcr_cntl); + unsigned glv_inv = G_587_GLV_INV(gcr_cntl); + unsigned gl1_inv = G_587_GL1_INV(gcr_cntl); + assert(G_587_GL2_US(gcr_cntl) == 0); + assert(G_587_GL2_RANGE(gcr_cntl) == 0); + assert(G_587_GL2_DISCARD(gcr_cntl) == 0); + unsigned gl2_inv = G_587_GL2_INV(gcr_cntl); + unsigned gl2_wb = G_587_GL2_WB(gcr_cntl); + unsigned gcr_seq = G_587_SEQ(gcr_cntl); - gcr_cntl &= C_586_GLV_INV & C_586_GL2_INV & C_586_GL2_WB; /* keep SEQ */ + gcr_cntl &= C_587_GLV_INV & C_587_GL2_INV & C_587_GL2_WB; /* keep SEQ */ if (ctx->gfx_level < GFX12) - gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GL1_INV; + gcr_cntl &= C_587_GLM_WB & C_587_GLM_INV & C_587_GL1_INV; si_cp_release_mem(ctx, cs, cb_db_event, - (ctx->gfx_level >= GFX12 ? 0 : S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | - S_490_GL1_INV(gl1_inv)) | - S_490_GLV_INV(glv_inv) | - S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) | - S_490_SEQ(gcr_seq), + (ctx->gfx_level >= GFX12 ? 0 : S_491_GLM_WB(glm_wb) | S_491_GLM_INV(glm_inv) | + S_491_GL1_INV(gl1_inv)) | + S_491_GLV_INV(glv_inv) | + S_491_GL2_INV(gl2_inv) | S_491_GL2_WB(gl2_wb) | + S_491_SEQ(gcr_seq), EOP_DST_SEL_MEM, EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT, wait_mem_scratch, va, ctx->wait_mem_number, SI_NOT_QUERY); @@ -294,9 +294,9 @@ static void gfx10_emit_barrier(struct si_context *ctx, struct radeon_cmdbuf *cs) } /* Ignore fields that only modify the behavior of other fields. */ - if (gcr_cntl & C_586_GL2_RANGE & C_586_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_586_GL1_RANGE)) { + if (gcr_cntl & C_587_GL2_RANGE & C_587_SEQ & (ctx->gfx_level >= GFX12 ? ~0 : C_587_GL1_RANGE)) { si_cp_acquire_mem(ctx, cs, gcr_cntl, - flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME); + flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME); } else if (flags & SI_BARRIER_PFP_SYNC_ME) { si_cp_pfp_sync_me(cs); } @@ -457,7 +457,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs) * * GFX6-GFX7 don't support L2 write-back. */ - unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_580_CP_PFP : V_580_CP_ME; + unsigned engine = flags & SI_BARRIER_PFP_SYNC_ME ? V_581B_CP_PFP : V_581B_CP_ME; if (flags & SI_BARRIER_INV_L2 || (sctx->gfx_level <= GFX7 && flags & SI_BARRIER_WB_L2)) { /* Invalidate L1 & L2. WB must be set on GFX8+ when TC_ACTION is set. */ @@ -485,7 +485,7 @@ static void gfx6_emit_barrier(struct si_context *sctx, struct radeon_cmdbuf *cs) S_0301F0_TC_NC_ACTION_ENA(1), /* If this is not the last ACQUIRE_MEM, flush in ME. * We only want to synchronize with PFP in the last ACQUIRE_MEM. */ - last_acquire_mem ? engine : V_580_CP_ME); + last_acquire_mem ? engine : V_581B_CP_ME); if (last_acquire_mem) flags &= ~SI_BARRIER_PFP_SYNC_ME; diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h index 6b3dcb2833a..35d0cf28c06 100644 --- a/src/gallium/drivers/radeonsi/si_build_pm4.h +++ b/src/gallium/drivers/radeonsi/si_build_pm4.h @@ -397,7 +397,7 @@ #define radeon_emit_alt_hiz_packets() do { \ radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); \ - radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_490_EVENT_INDEX(5)); \ + radeon_emit(S_491_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | S_491_EVENT_INDEX(5)); \ radeon_emit(0); /* DST_SEL, INT_SEL = no write confirm, DATA_SEL = no data */ \ radeon_emit(0); /* ADDRESS_LO */ \ radeon_emit(0); /* ADDRESS_HI */ \ diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 9f9abe8e512..26c9d230243 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -23,7 +23,7 @@ static inline unsigned cp_dma_max_byte_count(struct si_context *sctx) { unsigned max = sctx->gfx_level >= GFX11 ? 32767 : - sctx->gfx_level >= GFX9 ? S_415_BYTE_COUNT_GFX9(~0u) : S_415_BYTE_COUNT_GFX6(~0u); + sctx->gfx_level >= GFX9 ? S_506_BYTE_COUNT(~0u) : S_415_BYTE_COUNT(~0u); /* make it aligned for optimal performance */ return max & ~(SI_CPDMA_ALIGNMENT - 1); @@ -49,26 +49,26 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui } if (sctx->gfx_level >= GFX9) - command |= S_415_BYTE_COUNT_GFX9(size); + command |= S_506_BYTE_COUNT(size); else - command |= S_415_BYTE_COUNT_GFX6(size); + command |= S_415_BYTE_COUNT(size); /* Sync flags. */ if (flags & CP_DMA_SYNC) - header |= S_411_CP_SYNC(1); + header |= S_501_CP_SYNC(1); if (flags & CP_DMA_RAW_WAIT) - command |= S_415_RAW_WAIT(1); + command |= S_506_RAW_WAIT(1); /* Src and dst flags. */ /* GFX12: TC_L2 means MALL, which should always be set. */ if (sctx->screen->info.cp_dma_use_L2 || sctx->gfx_level == GFX12) - header |= S_501_DST_SEL(V_501_DST_ADDR_TC_L2); + header |= S_501_DST_SEL(V_501_DST_ADDR_USING_L2); if (flags & CP_DMA_CLEAR) { - header |= S_411_SRC_SEL(V_411_DATA); + header |= S_501_SRC_SEL(V_501_DATA); } else if (sctx->screen->info.cp_dma_use_L2 || sctx->gfx_level == GFX12) { - header |= S_501_SRC_SEL(V_501_SRC_ADDR_TC_L2); + header |= S_501_SRC_SEL(V_501_SRC_ADDR_USING_L2); } radeon_begin(cs); @@ -82,7 +82,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui radeon_emit(dst_va >> 32); /* DST_ADDR_HI [31:0] */ radeon_emit(command); } else { - header |= S_411_SRC_ADDR_HI(src_va >> 32); + header |= S_412_SRC_ADDR_HI(src_va >> 32); radeon_emit(PKT3(PKT3_CP_DMA, 4, 0)); radeon_emit(src_va); /* SRC_ADDR_LO [31:0] */ @@ -313,8 +313,8 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned assert(offset % 4 == 0); assert(size % 4 == 0); - if (sctx->gfx_level == GFX6 && dst_sel == V_370_MEM) - dst_sel = V_370_MEM_GRBM; + if (sctx->gfx_level == GFX6 && dst_sel == V_371_MEMORY) + dst_sel = V_371_MEM_GRBM; radeon_add_to_buffer_list(sctx, cs, buf, RADEON_USAGE_WRITE | RADEON_PRIO_CP_DMA); uint64_t va = buf->gpu_address + offset; diff --git a/src/gallium/drivers/radeonsi/si_cp_utils.c b/src/gallium/drivers/radeonsi/si_cp_utils.c index 6036e764e3a..f5f0a4d8444 100644 --- a/src/gallium/drivers/radeonsi/si_cp_utils.c +++ b/src/gallium/drivers/radeonsi/si_cp_utils.c @@ -59,7 +59,7 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign if (!compute_ib) sctx->context_roll = true; - if (engine == V_580_CP_PFP) + if (engine == V_581B_CP_PFP) si_cp_pfp_sync_me(cs); } } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 771050d0ca2..1b3825b68da 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1819,8 +1819,8 @@ static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc data = desc->list + desc_slot_offset; va = desc->gpu_address + desc_slot_offset * 4; - si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_370_TC_L2, - V_370_ME, data); + si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_371_TC_L2, + V_371_MICRO_ENGINE, data); } static void si_upload_bindless_descriptors(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index c14ccddd555..cd00dbf3e24 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -218,7 +218,7 @@ static void si_fine_fence_set(struct si_context *ctx, struct si_fine_fence *fine if (flags & PIPE_FLUSH_TOP_OF_PIPE) { uint32_t value = 0x80000000; - si_cp_write_data(ctx, fine->buf, fine->offset, 4, V_370_MEM, V_370_PFP, &value); + si_cp_write_data(ctx, fine->buf, fine->offset, 4, V_371_MEMORY, V_371_PREFETCH_PARSER, &value); } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) { uint64_t fence_va = fine->buf->gpu_address + fine->offset; diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index cec992ffd16..9b76d89684a 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -573,7 +573,7 @@ void si_trace_emit(struct si_context *sctx) struct radeon_cmdbuf *cs = &sctx->gfx_cs; uint32_t trace_id = ++sctx->current_saved_cs->trace_id; - si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_370_MEM, V_370_ME, &trace_id); + si_cp_write_data(sctx, sctx->current_saved_cs->trace_buf, 0, 4, V_371_MEMORY, V_371_MICRO_ENGINE, &trace_id); ac_emit_cp_nop(&cs->current, AC_ENCODE_TRACE_POINT(trace_id)); diff --git a/src/gallium/drivers/radeonsi/si_mesh_shader.c b/src/gallium/drivers/radeonsi/si_mesh_shader.c index 10378168d31..360ebb46e08 100644 --- a/src/gallium/drivers/radeonsi/si_mesh_shader.c +++ b/src/gallium/drivers/radeonsi/si_mesh_shader.c @@ -785,7 +785,7 @@ static void si_emit_task_wait_packets(struct si_context *sctx) if (sctx->task_wait_count == sctx->last_task_wait_count) return; - si_cp_write_data(sctx, sctx->task_wait_buf, 0, 4, V_370_MEM, V_370_ME, + si_cp_write_data(sctx, sctx->task_wait_buf, 0, 4, V_371_MEMORY, V_371_MICRO_ENGINE, &sctx->task_wait_count); si_cp_wait_mem(sctx, sctx->gfx_cs.gang_cs, sctx->task_wait_buf->gpu_address, diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index a7f5fc28b32..06c33996c78 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -61,7 +61,7 @@ static void si_pc_wait_idle(struct si_context *sctx) radeon_emit(EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4))); radeon_end(); - si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_580_CP_PFP); + si_cp_acquire_mem(sctx, cs, coher_cntl_stall_all, V_581B_CP_PFP); } static void si_pc_emit_instance(struct si_context *sctx, int se, int instance) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 6dc17da7949..e2b07b6ca34 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -858,7 +858,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign goto fail; } - si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4, V_370_MEM, V_370_ME, + si_cp_write_data(sctx, sctx->wait_mem_scratch, 0, 4, V_371_MEMORY, V_371_MICRO_ENGINE, &sctx->wait_mem_number); } diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index ebe62e8a3de..66115482779 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -844,7 +844,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx, struct si_query_h /* Clear the emulated counter end value. We don't clear start because it's unused. */ va += si_query_pipestat_end_dw_offset(sctx->screen, query->index) * 4; - ac_emit_cp_write_data_imm(&cs->current, V_370_PFP, va, 0); + ac_emit_cp_write_data_imm(&cs->current, V_371_PREFETCH_PARSER, va, 0); sctx->num_pipeline_stat_emulated_queries++; } else { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 3cccfba785a..8b9de9431d5 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -643,18 +643,18 @@ static void si_cp_dma_prefetch_inline(struct radeon_cmdbuf *cs, uint64_t address */ assert(size % SI_CPDMA_ALIGNMENT == 0); assert(address % SI_CPDMA_ALIGNMENT == 0); - assert(size < S_415_BYTE_COUNT_GFX6(~0u)); + assert(size < S_415_BYTE_COUNT(~0u)); assert(address || size == 0); - uint32_t header = S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); - uint32_t command = S_415_BYTE_COUNT_GFX6(size); + uint32_t header = S_501_SRC_SEL(V_501_SRC_ADDR_USING_L2); + uint32_t command = S_415_BYTE_COUNT(size); if (GFX_VERSION >= GFX9) { - command |= S_415_DISABLE_WR_CONFIRM_GFX9(1); - header |= S_411_DST_SEL(V_411_NOWHERE); + command |= S_506_DISABLE_WR_CONFIRM(1); + header |= S_501_DST_SEL(V_501_DST_NOWHERE); } else { - command |= S_415_DISABLE_WR_CONFIRM_GFX6(1); - header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); + command |= S_415_DISABLE_WR_CONFIRM(1); + header |= S_501_DST_SEL(V_501_DST_ADDR_USING_L2); } radeon_begin(cs); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 9c48154713c..735b8c49f58 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4982,7 +4982,7 @@ static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index) * in memory. */ si_cp_release_acquire_mem_pws(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, - V_580_CP_ME, 0); + V_581B_CP_ME, 0); uint64_t attr_address = sctx->ws->cs_is_secure(&sctx->gfx_cs) ? sscreen->attribute_pos_prim_ring_tmz->gpu_address : diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp index 552c2380d1f..3e91911c834 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp @@ -810,11 +810,11 @@ static bool amdgpu_get_new_ib(struct amdgpu_winsys *aws, static void amdgpu_set_ib_size(struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib) { if (ib->is_chained_ib) { - *ib->ptr_ib_size = rcs->current.cdw | S_3F2_CHAIN(1) | S_3F2_VALID(1); + *ib->ptr_ib_size = rcs->current.cdw | S_3F3_CHAIN(1) | S_3F3_VALID(1); struct amdgpu_cs *acs = amdgpu_cs(rcs); if (!rcs->gang && acs->preamble_ib_bo) - *ib->ptr_ib_size |= S_3F2_PRE_ENA(1); + *ib->ptr_ib_size |= S_3F3_PRE_ENA(1); } else { *ib->ptr_ib_size = rcs->current.cdw; } @@ -1567,10 +1567,10 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws, /* add release mem for user fence */ amdgpu_pkt_add_dw(PKT3(PKT3_RELEASE_MEM, 6, 0)); - amdgpu_pkt_add_dw(S_490_EVENT_TYPE(V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT) | - S_490_EVENT_INDEX(5) | - (aws->info.gfx_level >= GFX12 ? 0 : S_490_GLM_WB(1) | S_490_GLM_INV(1)) | - S_490_GL2_WB(1) | S_490_SEQ(1) | S_490_CACHE_POLICY(3)); + amdgpu_pkt_add_dw(S_491_EVENT_TYPE(V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT) | + S_491_EVENT_INDEX(5) | + (aws->info.gfx_level >= GFX12 ? 0 : S_491_GLM_WB(1) | S_491_GLM_INV(1)) | + S_491_GL2_WB(1) | S_491_SEQ(1) | S_491_CACHE_POLICY(3)); amdgpu_pkt_add_dw(S_030358_DATA_SEL(2)); amdgpu_pkt_add_dw(userq->user_fence_va); amdgpu_pkt_add_dw(userq->user_fence_va >> 32);