Merge branch 'ac-new-cp-packet-parser' into 'main'

amd: add a new auto-generated CP packet parser using the new packet definitions

See merge request mesa/mesa!40183
This commit is contained in:
Marek Olšák 2026-03-11 04:54:49 +00:00
commit 01153d6c8d
20 changed files with 615 additions and 445 deletions

View file

@ -538,8 +538,8 @@ ac_emit_cp_atomic_mem(struct ac_cmdbuf *cs, uint32_t atomic_op,
{
ac_cmdbuf_begin(cs);
ac_cmdbuf_emit(PKT3(PKT3_ATOMIC_MEM, 7, 0));
ac_cmdbuf_emit(ATOMIC_OP(atomic_op) |
ATOMIC_COMMAND(atomic_cmd));
ac_cmdbuf_emit(S_1E_1_ATOMIC(atomic_op) |
S_1E_1_COMMAND(atomic_cmd));
ac_cmdbuf_emit(va); /* addr lo */
ac_cmdbuf_emit(va >> 32); /* addr hi */
ac_cmdbuf_emit(data); /* data lo */

View file

@ -102,8 +102,13 @@ struct ac_ib_parser {
unsigned cur_dw;
};
void ac_print_data_dword(FILE *file, uint32_t value, const char *comment);
void ac_print_named_value(FILE *file, const char *name, uint32_t value, int bits);
void ac_print_string_value(FILE *file, const char *name, const char *value);
void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, enum radeon_family family,
unsigned offset, uint32_t value, uint32_t field_mask);
uint32_t ac_ib_get(struct ac_ib_parser *ib);
void ac_ib_handle_address(struct ac_ib_parser *ib, uint32_t addr_lo, uint32_t addr_hi, uint32_t size);
void ac_parse_ib_chunk(struct ac_ib_parser *ib);
void ac_parse_ib(struct ac_ib_parser *ib, const char *name);

View file

@ -5,6 +5,8 @@
*/
#include "ac_debug.h"
#include "amd_cp_print_packet_gfx11.h"
#include "amd_cp_print_packet_gfx12.h"
#include "sid.h"
#define SID_TABLE_IMPLEMENTATION
#include "sid_tables.h"
@ -97,6 +99,21 @@ static void print_string_value(FILE *file, const char *name, const char *value)
fprintf(file, "%s\n", value);
}
void ac_print_data_dword(FILE *file, uint32_t value, const char *comment)
{
print_data_dword(file, value, comment);
}
void ac_print_named_value(FILE *file, const char *name, uint32_t value, int bits)
{
print_named_value(file, name, value, bits);
}
void ac_print_string_value(FILE *file, const char *name, const char *value)
{
print_string_value(file, name, value);
}
void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, enum radeon_family family,
unsigned offset, uint32_t value, uint32_t field_mask)
{
@ -140,7 +157,7 @@ void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, enum radeon_family fa
O_COLOR_RESET, value);
}
static uint32_t ac_ib_get(struct ac_ib_parser *ib)
uint32_t ac_ib_get(struct ac_ib_parser *ib)
{
uint32_t v = 0;
@ -228,17 +245,8 @@ static void ac_parse_set_reg_pairs_packed_packet(FILE *f, unsigned count, unsign
#define AC_ADDR_SIZE_NOT_MEMORY 0xFFFFFFFF
static void print_addr(struct ac_ib_parser *ib, const char *name, uint64_t addr, uint32_t size)
static const char *check_address(struct ac_ib_parser *ib, uint64_t addr, uint32_t size)
{
FILE *f = ib->f;
print_spaces(f, INDENT_PKT);
fprintf(f, "%s%s%s <- ",
O_COLOR_YELLOW, name,
O_COLOR_RESET);
fprintf(f, "0x%llx", (unsigned long long)addr);
if (ib->addr_callback && size != AC_ADDR_SIZE_NOT_MEMORY) {
struct ac_addr_info addr_info;
ib->addr_callback(ib->addr_callback_data, addr, &addr_info);
@ -250,16 +258,56 @@ static void print_addr(struct ac_ib_parser *ib, const char *name, uint64_t addr,
uint32_t invalid_count = !addr_info.valid + !addr_info2.valid;
if (addr_info.use_after_free && addr_info2.use_after_free)
fprintf(f, " used after free");
return " used after free";
else if (invalid_count == 2)
fprintf(f, " invalid");
return " invalid";
else if (invalid_count == 1)
fprintf(f, " out of bounds");
return " out of bounds";
}
return NULL;
}
void ac_ib_handle_address(struct ac_ib_parser *ib, uint32_t addr_lo, uint32_t addr_hi, uint32_t size)
{
uint64_t addr = addr_lo | ((uint64_t)addr_hi << 32);
const char *addr_message = check_address(ib, addr, size);
print_spaces(ib->f, INDENT_PKT);
fprintf(ib->f, "%s(FULL ADDRESS)%s <- 0x%"PRIx64, O_COLOR_YELLOW, O_COLOR_RESET, addr);
if (addr_message)
fprintf(ib->f, "%s", addr_message);
fprintf(ib->f, "\n");
}
static void print_addr(struct ac_ib_parser *ib, const char *name, uint64_t addr, uint32_t size)
{
FILE *f = ib->f;
print_spaces(f, INDENT_PKT);
fprintf(f, "%s%s%s <- ",
O_COLOR_YELLOW, name,
O_COLOR_RESET);
fprintf(f, "0x%llx", (unsigned long long)addr);
const char *addr_message = check_address(ib, addr, size);
if (addr_message)
fprintf(f, "%s", addr_message);
fprintf(f, "\n");
}
static void ac_cp_print_packet_generated(struct ac_ib_parser *ib, unsigned opcode, unsigned count)
{
if (ib->gfx_level >= GFX12)
amd_cp_print_packet_gfx12(ib, opcode, count);
else
amd_cp_print_packet_gfx11(ib, opcode, count);
}
static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
int *current_trace_id)
{
@ -270,7 +318,6 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
const char *predicated = PKT3_PREDICATE(header) ? "(predicated)" : "";
const char *reset_filter_cam = PKT3_RESET_FILTER_CAM_G(header) ? "(reset_filter_cam)" : "";
int i;
unsigned tmp;
/* Print the name first. */
for (i = 0; i < packet3_table_size; i++)
@ -332,39 +379,8 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
case PKT3_SET_SH_REG_PAIRS_PACKED_N:
ac_parse_set_reg_pairs_packed_packet(f, count, SI_SH_REG_OFFSET, ib);
break;
case PKT3_ACQUIRE_MEM:
if (ib->gfx_level >= GFX11) {
if (G_585_PWS_ENA(ib->ib[ib->cur_dw + 5])) {
ac_dump_reg(f, ib->gfx_level, ib->family, R_580_ACQUIRE_MEM_PWS_2, ac_ib_get(ib), ~0);
print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32);
print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25);
print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32);
print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_585_ACQUIRE_MEM_PWS_7, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
} else {
print_string_value(f, "ENGINE_SEL", ac_ib_get(ib) & 0x80000000 ? "ME" : "PFP");
print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32);
print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25);
print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32);
print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32);
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
}
} else {
tmp = ac_ib_get(ib);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, tmp, 0x7fffffff);
print_string_value(f, "ENGINE_SEL", tmp & 0x80000000 ? "ME" : "PFP");
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
if (ib->gfx_level >= GFX10)
ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
}
break;
case PKT3_SURFACE_SYNC:
/* GFX6-8 */
if (ib->gfx_level >= GFX7) {
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
@ -376,18 +392,8 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
}
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
break;
case PKT3_EVENT_WRITE: {
uint32_t event_dw = ac_ib_get(ib);
ac_dump_reg(f, ib->gfx_level, ib->family, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1);
if (count > 0)
print_addr(ib, "ADDR", ac_ib_get64(ib), 0);
break;
}
case PKT3_EVENT_WRITE_EOP: {
/* GFX6-8 */
uint32_t event_dw = ac_ib_get(ib);
ac_dump_reg(f, ib->gfx_level, ib->family, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
@ -421,10 +427,13 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
break;
}
case PKT3_RELEASE_MEM: {
uint32_t event_dw = ac_ib_get(ib);
if (ib->gfx_level >= GFX10) {
ac_dump_reg(f, ib->gfx_level, ib->family, R_490_RELEASE_MEM_OP, event_dw, ~0u);
} else {
ac_cp_print_packet_generated(ib, op, count);
break;
}
/* GFX6-9 */
uint32_t event_dw = ac_ib_get(ib);
{
ac_dump_reg(f, ib->gfx_level, ib->family, R_028A90_VGT_EVENT_INITIATOR, event_dw,
S_028A90_EVENT_TYPE(~0));
print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
@ -448,67 +457,12 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
print_named_value(f, "CTXID", ac_ib_get(ib), 32);
break;
}
case PKT3_WAIT_REG_MEM:
print_named_value(f, "OP", ac_ib_get(ib), 32);
print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
print_named_value(f, "REF", ac_ib_get(ib), 32);
print_named_value(f, "MASK", ac_ib_get(ib), 32);
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
break;
case PKT3_DRAW_INDEX_AUTO:
ac_dump_reg(f, ib->gfx_level, ib->family, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
break;
case PKT3_DRAW_INDEX_2:
ac_dump_reg(f, ib->gfx_level, ib->family, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
print_addr(ib, "INDEX_ADDR", ac_ib_get64(ib), 0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
break;
case PKT3_DRAW_INDIRECT:
case PKT3_DRAW_INDEX_INDIRECT:
print_named_value(f, "OFFSET", ac_ib_get(ib), 32);
print_named_value(f, "VERTEX_OFFSET_REG", ac_ib_get(ib), 32);
print_named_value(f, "START_INSTANCE_REG", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
break;
case PKT3_DRAW_INDIRECT_MULTI:
case PKT3_DRAW_INDEX_INDIRECT_MULTI:
print_named_value(f, "OFFSET", ac_ib_get(ib), 32);
print_named_value(f, "VERTEX_OFFSET_REG", ac_ib_get(ib), 32);
print_named_value(f, "START_INSTANCE_REG", ac_ib_get(ib), 32);
tmp = ac_ib_get(ib);
print_named_value(f, "DRAW_ID_REG", tmp & 0xFFFF, 16);
print_named_value(f, "DRAW_ID_ENABLE", tmp >> 31, 1);
print_named_value(f, "COUNT_INDIRECT_ENABLE", (tmp >> 30) & 1, 1);
print_named_value(f, "DRAW_COUNT", ac_ib_get(ib), 32);
print_addr(ib, "COUNT_ADDR", ac_ib_get64(ib), 0);
print_named_value(f, "STRIDE", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
break;
case PKT3_INDEX_BASE:
print_addr(ib, "ADDR", ac_ib_get64(ib), 0);
break;
case PKT3_INDEX_TYPE:
/* GFX6-8 */
ac_dump_reg(f, ib->gfx_level, ib->family, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
break;
case PKT3_NUM_INSTANCES:
ac_dump_reg(f, ib->gfx_level, ib->family, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
break;
case PKT3_WRITE_DATA: {
uint32_t control = ac_ib_get(ib);
ac_dump_reg(f, ib->gfx_level, ib->family, R_370_CONTROL, control, ~0);
uint32_t dst_sel = G_370_DST_SEL(control);
uint64_t addr = ac_ib_get64(ib);
uint32_t dword_count = first_dw + count + 1 - ib->cur_dw;
bool writes_memory = dst_sel == V_370_MEM_GRBM || dst_sel == V_370_TC_L2 || dst_sel == V_370_MEM;
print_addr(ib, "DST_ADDR", addr, writes_memory ? dword_count * 4 : AC_ADDR_SIZE_NOT_MEMORY);
for (uint32_t i = 0; i < dword_count; i++)
print_data_dword(f, ac_ib_get(ib), "data");
break;
}
case PKT3_CP_DMA:
/* GFX6 */
ac_dump_reg(f, ib->gfx_level, ib->family, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
@ -516,6 +470,11 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
ac_dump_reg(f, ib->gfx_level, ib->family, R_415_COMMAND, ac_ib_get(ib), ~0);
break;
case PKT3_DMA_DATA: {
if (ib->gfx_level >= GFX9) {
ac_cp_print_packet_generated(ib, op, count);
break;
}
/* GFX7-8 */
uint32_t header = ac_ib_get(ib);
ac_dump_reg(f, ib->gfx_level, ib->family, R_501_DMA_DATA_WORD0, header, ~0);
@ -523,8 +482,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
uint64_t dst_addr = ac_ib_get64(ib);
uint32_t command = ac_ib_get(ib);
uint32_t size = ib->gfx_level >= GFX9 ? G_415_BYTE_COUNT_GFX9(command)
: G_415_BYTE_COUNT_GFX6(command);
uint32_t size = G_415_BYTE_COUNT_GFX6(command);
uint32_t src_sel = G_501_SRC_SEL(header);
bool src_mem = (src_sel == V_501_SRC_ADDR && G_415_SAS(command) == V_415_MEMORY) ||
@ -540,7 +498,6 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
break;
}
case PKT3_INDIRECT_BUFFER_SI:
case PKT3_INDIRECT_BUFFER_CONST:
case PKT3_INDIRECT_BUFFER: {
uint32_t base_lo_dw = ac_ib_get(ib);
ac_dump_reg(f, ib->gfx_level, ib->family, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
@ -585,11 +542,6 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
fprintf(f, "\n\035<------------------- nested end -------------------\n");
break;
}
case PKT3_CLEAR_STATE:
case PKT3_INCREMENT_DE_COUNTER:
case PKT3_PFP_SYNC_ME:
print_data_dword(f, ac_ib_get(ib), "reserved");
break;
case PKT3_NOP:
if (header == PKT3_NOP_PAD) {
count = -1; /* One dword NOP. */
@ -626,128 +578,8 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
print_data_dword(f, ac_ib_get(ib), "unused");
}
break;
case PKT3_DISPATCH_DIRECT:
case PKT3_DISPATCH_DIRECT_INTERLEAVED:
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B804_COMPUTE_DIM_X, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B808_COMPUTE_DIM_Y, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B80C_COMPUTE_DIM_Z, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR,
ac_ib_get(ib), ~0);
break;
case PKT3_DISPATCH_INDIRECT:
case PKT3_DISPATCH_INDIRECT_INTERLEAVED:
if (count > 1)
print_addr(ib, "ADDR", ac_ib_get64(ib), 12);
else
print_named_value(f, "DATA_OFFSET", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR,
ac_ib_get(ib), ~0);
break;
case PKT3_SET_BASE:
tmp = ac_ib_get(ib);
print_string_value(f, "BASE_INDEX", tmp == 1 ? "INDIRECT_BASE" : COLOR_RED "UNKNOWN" COLOR_RESET);
print_addr(ib, "ADDR", ac_ib_get64(ib), 0);
break;
case PKT3_PRIME_UTCL2:
tmp = ac_ib_get(ib);
print_named_value(f, "CACHE_PERM[rwx]", tmp & 0x7, 3);
print_string_value(f, "PRIME_MODE", tmp & 0x8 ? "WAIT_FOR_XACK" : "DONT_WAIT_FOR_XACK");
print_named_value(f, "ENGINE_SEL", tmp >> 30, 2);
print_addr(ib, "ADDR", ac_ib_get64(ib), 0);
print_named_value(f, "REQUESTED_PAGES", ac_ib_get(ib), 14);
break;
case PKT3_ATOMIC_MEM:
tmp = ac_ib_get(ib);
print_named_value(f, "ATOMIC", tmp & 0x7f, 7);
print_named_value(f, "COMMAND", (tmp >> 8) & 0xf, 4);
print_named_value(f, "CACHE_POLICY", (tmp >> 25) & 0x3, 2);
print_named_value(f, "ENGINE_SEL", tmp >> 30, 2);
print_addr(ib, "ADDR", ac_ib_get64(ib), 8);
print_named_value(f, "SRC_DATA_LO", ac_ib_get(ib), 32);
print_named_value(f, "SRC_DATA_HI", ac_ib_get(ib), 32);
print_named_value(f, "CMP_DATA_LO", ac_ib_get(ib), 32);
print_named_value(f, "CMP_DATA_HI", ac_ib_get(ib), 32);
print_named_value(f, "LOOP_INTERVAL", ac_ib_get(ib) & 0x1fff, 13);
break;
case PKT3_INDEX_BUFFER_SIZE:
print_named_value(f, "COUNT", ac_ib_get(ib), 32);
break;
case PKT3_COND_EXEC: {
uint32_t size = ac_ib_get(ib) * 4;
print_addr(ib, "ADDR", ac_ib_get64(ib), size);
print_named_value(f, "SIZE", size, 32);
break;
}
case PKT3_DISPATCH_TASKMESH_GFX:
tmp = ac_ib_get(ib);
print_named_value(f, "RING_ENTRY_REG", (tmp >> 16) & 0xffff, 16);
print_named_value(f, "XYZ_DIM_REG", (tmp & 0xffff), 16);
tmp = ac_ib_get(ib);
print_named_value(f, "THREAD_TRACE_MARKER_ENABLE", (tmp >> 31) & 0x1, 1);
if (ib->gfx_level >= GFX11) {
print_named_value(f, "XYZ_DIM_ENABLE", (tmp >> 30) & 0x1, 1);
print_named_value(f, "MODE1_ENABLE", (tmp >> 29) & 0x1, 1);
print_named_value(f, "LINEAR_DISPATCH_ENABLED", (tmp >> 28) & 0x1, 1);
}
print_named_value(f, "DI_SRC_SEL_AUTO_INDEX", ac_ib_get(ib), ~0);
break;
case PKT3_DISPATCH_TASKMESH_DIRECT_ACE:
print_named_value(f, "X_DIM", ac_ib_get(ib), ~0);
print_named_value(f, "Y_DIM", ac_ib_get(ib), ~0);
print_named_value(f, "Z_DIM", ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR,
ac_ib_get(ib), ~0);
print_named_value(f, "RING_ENTRY_REG", ac_ib_get(ib), 16);
break;
case PKT3_DISPATCH_MESH_DIRECT:
print_named_value(f, "X_DIM", ac_ib_get(ib), ~0);
print_named_value(f, "Y_DIM", ac_ib_get(ib), ~0);
print_named_value(f, "Z_DIM", ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR,
ac_ib_get(ib), ~0);
break;
case PKT3_DISPATCH_MESH_INDIRECT_MULTI:
print_named_value(f, "DATA_OFFSET", ac_ib_get(ib), 32);
tmp = ac_ib_get(ib);
print_named_value(f, "DRAW_INDEX_LOC", (tmp >> 16) & 0xffff, 16);
print_named_value(f, "XYZ_DIM_LOC", tmp & 0xffff, 16);
tmp = ac_ib_get(ib);
print_named_value(f, "DRAW_INDEX_ENABLE", tmp >> 31, 1);
print_named_value(f, "COUNT_INDIRECT_ENABLE", (tmp >> 30) & 1, 1);
print_named_value(f, "THREAD_TRACE_MARKER_ENABLE", (tmp >> 29) & 1, 1);
if (ib->gfx_level >= GFX11) {
print_named_value(f, "XYZ_DIM_ENABLE", (tmp >> 28) & 1, 1);
print_named_value(f, "MODE1_ENABLE", (tmp >> 27) & 1, 1);
} else {
print_named_value(f, "USE_VGPRS", (tmp >> 28) & 1, 1);
}
print_named_value(f, "COUNT", ac_ib_get(ib), 32);
print_addr(ib, "COUNT_ADDR", ac_ib_get64(ib), 0);
print_named_value(f, "STRIDE", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR,
ac_ib_get(ib), ~0);
break;
case PKT3_DISPATCH_TASK_STATE_INIT:
print_addr(ib, "CONTROL_BUF_ADDR", ac_ib_get64(ib), 0);
break;
case PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE:
print_addr(ib, "DATA_ADDR", ac_ib_get64(ib), 0);
tmp = ac_ib_get(ib);
print_named_value(f, "RING_ENTRY_LOC", tmp & 0xffff, 16);
tmp = ac_ib_get(ib);
print_named_value(f, "DRAW_INDEX_LOC", (tmp >> 16) & 0xffff, 16);
print_named_value(f, "XYZ_DIM_ENABLE", (tmp >> 3) & 1, 1);
print_named_value(f, "DRAW_INDEX_ENABLE", (tmp >> 2), 1);
print_named_value(f, "COUNT_INDIRECT_ENABLE", (tmp >> 1) & 1, 1);
print_named_value(f, "THREAD_TRACE_MARKER_ENABLE", tmp & 1, 1);
tmp = ac_ib_get(ib);
print_named_value(f, "XYZ_DIM_LOC", tmp & 0xffff, 16);
print_named_value(f, "COUNT", ac_ib_get(ib), 32);
print_addr(ib, "COUNT_ADDR", ac_ib_get64(ib), 0);
print_named_value(f, "STRIDE", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR,
ac_ib_get(ib), ~0);
default:
ac_cp_print_packet_generated(ib, op, count);
break;
}

View file

@ -3069,18 +3069,18 @@ struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *i
ac_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(pm4,
CC0_UPDATE_LOAD_ENABLES(1) |
CC0_LOAD_PER_CONTEXT_STATE(1) |
CC0_LOAD_CS_SH_REGS(1) |
CC0_LOAD_GFX_SH_REGS(1) |
CC0_LOAD_GLOBAL_UCONFIG(1));
S_28_1_UPDATE_LOAD_ENABLES(1) |
S_28_1_LOAD_PER_CONTEXT_STATE(1) |
S_28_1_LOAD_CS_SH_REGS(1) |
S_28_1_LOAD_GFX_SH_REGS(1) |
S_28_1_LOAD_GLOBAL_UCONFIG(1));
ac_pm4_cmd_add(pm4,
CC1_UPDATE_SHADOW_ENABLES(1) |
CC1_SHADOW_PER_CONTEXT_STATE(1) |
CC1_SHADOW_CS_SH_REGS(1) |
CC1_SHADOW_GFX_SH_REGS(1) |
CC1_SHADOW_GLOBAL_UCONFIG(1) |
CC1_SHADOW_GLOBAL_CONFIG(1));
S_28_2_UPDATE_SHADOW_ENABLES(1) |
S_28_2_SHADOW_PER_CONTEXT_STATE(1) |
S_28_2_SHADOW_CS_SH_REGS(1) |
S_28_2_SHADOW_GFX_SH_REGS(1) |
S_28_2_SHADOW_GLOBAL_UCONFIG(1) |
S_28_2_SHADOW_GLOBAL_CONFIG(1));
for (unsigned i = 0; i < SI_NUM_REG_RANGES; i++)
ac_build_load_reg(info, pm4, i, gpu_address);

View file

@ -594,7 +594,7 @@ ac_sqtt_copy_info_regs(const struct radeon_info *info, struct ac_pm4_state *pm4,
uint32_t init_wptr_value = shifted_data_va & 0x1fffffff;
ac_pm4_cmd_add(pm4, PKT3(PKT3_ATOMIC_MEM, 7, 0));
ac_pm4_cmd_add(pm4, ATOMIC_OP(TC_OP_ATOMIC_SUB_RTN_32));
ac_pm4_cmd_add(pm4, S_1E_1_ATOMIC(V_1E_1_GL2_OP_ATOMIC_SUB_RTN_32));
ac_pm4_cmd_add(pm4, info_va); /* addr lo */
ac_pm4_cmd_add(pm4, info_va >> 32); /* addr hi */
ac_pm4_cmd_add(pm4, init_wptr_value); /* data lo */

View file

@ -33,13 +33,34 @@ amd_packet_files = [
]
amd_packet_headers = []
amd_ib_parsers = []
foreach gen : ['gfx11', 'gfx12']
amd_packet_headers += custom_target(
'amd_cp_packets_' + gen + '.h',
input : ['../packets/parse_cp_pm4_table_data_json.py', amd_packet_files],
output : 'amd_cp_packets_' + gen + '.h',
command : [prog_python, '@INPUT@', gen],
command : [prog_python, '@INPUT@', gen, 'packets_h'],
capture : true,
)
amd_ib_parsers += custom_target(
'amd_cp_print_packet_' + gen + '.c',
input : ['../packets/parse_cp_pm4_table_data_json.py',
'../packets/cp_pm4_table_data_' + gen + '.json',
'../packets/pm4_it_opcodes_' + gen + '.h'],
output : 'amd_cp_print_packet_' + gen + '.c',
command : [prog_python, '@INPUT@', gen, 'print_c'],
capture : true,
)
amd_ib_parsers += custom_target(
'amd_cp_print_packet_' + gen + '.h',
input : ['../packets/parse_cp_pm4_table_data_json.py',
'../packets/cp_pm4_table_data_' + gen + '.json',
'../packets/pm4_it_opcodes_' + gen + '.h'],
output : 'amd_cp_print_packet_' + gen + '.h',
command : [prog_python, '@INPUT@', gen, 'print_h'],
capture : true,
)
endforeach
@ -161,7 +182,7 @@ amd_common_files = files(
'nir/ac_nir_prerast_utils.c',
'nir/ac_nir_surface.c',
'nir/ac_nir_surface.h',
)
) + amd_ib_parsers
if not with_platform_windows
amd_common_files += files(

View file

@ -36,10 +36,6 @@
#define SI_SHADOWED_REG_BUFFER_SIZE \
(SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE)
/* All registers defined in this packet section don't exist and the only
* purpose of these definitions is to define packet encoding that
* the IB parser understands, and also to have an accurate documentation.
*/
#define PKT3_NOP 0x10
#define PKT3_SET_BASE 0x11
#define PKT3_CLEAR_STATE 0x12
@ -47,58 +43,27 @@
#define PKT3_DISPATCH_DIRECT 0x15
#define PKT3_DISPATCH_INDIRECT 0x16
#define PKT3_ATOMIC_MEM 0x1E
#define ATOMIC_OP(x) ((unsigned)((x)&0x7f) << 0)
#define TC_OP_ATOMIC_SUB_RTN_32 16
#define TC_OP_ATOMIC_SUB_RTN_64 48
#define TC_OP_ATOMIC_CMPSWAP_32 72
#define TC_OP_ATOMIC_SUB_64 112
#define TC_OP_ATOMIC_XOR_64 119
#define ATOMIC_COMMAND(x) ((unsigned)((x)&0x3) << 8)
#define ATOMIC_COMMAND_SEND_RTN 0x0 /* only RTN opcodes */
#define ATOMIC_COMMAND_LOOP 0x1 /* only RTN opcodes */
#define ATOMIC_COMMAND_WR_CONFIRM 0x2 /* only non-RTN opcodes */
#define ATOMIC_COMMAND_SEND_NO_RTN 0x3 /* only non-RTN opcodes */
#define ATOMIC_ENGINE_PFP (1 << 30)
#define PKT3_OCCLUSION_QUERY 0x1F /* GFX7+ */
#define PKT3_SET_PREDICATION 0x20
#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
#define PREDICATION_DRAW_VISIBLE (1 << 8)
#define PREDICATION_HINT_WAIT (0 << 12)
#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
#define PRED_OP(x) ((x) << 16)
#define PREDICATION_OP_CLEAR 0x0
#define PREDICATION_OP_ZPASS 0x1
#define PREDICATION_OP_PRIMCOUNT 0x2
#define PREDICATION_OP_BOOL64 0x3
#define PREDICATION_OP_BOOL32 0x4
#define PREDICATION_CONTINUE (1 << 31)
#define PREDICATION_DRAW_NOT_VISIBLE S_20_1_PRED_BOOL(V_20_1_DRAW_IF_NOT_VISIBLE_OR_OVERFLOW)
#define PREDICATION_DRAW_VISIBLE S_20_1_PRED_BOOL(V_20_1_DRAW_IF_VISIBLE_OR_NO_OVERFLOW)
#define PREDICATION_HINT_WAIT S_20_1_HINT(V_20_1_WAIT_UNTIL_FINAL_ZPASS_WRITTEN)
#define PREDICATION_HINT_NOWAIT_DRAW S_20_1_HINT(V_20_1_DRAW_IF_NOT_FINAL_ZPASS_WRITTEN)
#define PREDICATION_OP_CLEAR V_20_1_CLEAR_PREDICATE
#define PREDICATION_OP_ZPASS V_20_1_SET_ZPASS_PREDICATE
#define PREDICATION_OP_PRIMCOUNT V_20_1_SET_PRIMCOUNT_PREDICATE
#define PREDICATION_OP_BOOL64 V_20_1_DX12
#define PREDICATION_OP_BOOL32 V_20_1_VULKAN
#define PREDICATION_CONTINUE S_20_1_CONTINUE_BIT(V_20_1_CONTINUE_SET_PREDICATION)
#define PKT3_COND_EXEC 0x22
#define COND_EXEC_USERQ_OVERRULE_CMD (1 << 31)
#define PKT3_PRED_EXEC 0x23
#define PKT3_DRAW_INDIRECT 0x24
#define PKT3_DRAW_INDEX_INDIRECT 0x25
#define PKT3_INDEX_BASE 0x26
#define PKT3_DRAW_INDEX_2 0x27
#define PKT3_CONTEXT_CONTROL 0x28
#define CC0_LOAD_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0)
#define CC0_LOAD_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1)
#define CC0_LOAD_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15)
#define CC0_LOAD_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16)
#define CC0_LOAD_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24)
#define CC0_LOAD_CE_RAM(x) (((unsigned)(x)&0x1) << 28)
#define CC0_UPDATE_LOAD_ENABLES(x) (((unsigned)(x)&0x1) << 31)
#define CC1_SHADOW_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0)
#define CC1_SHADOW_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1)
#define CC1_SHADOW_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15)
#define CC1_SHADOW_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16)
#define CC1_SHADOW_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24)
#define CC1_UPDATE_SHADOW_ENABLES(x) (((unsigned)(x)&0x1) << 31)
#define PKT3_INDEX_TYPE 0x2A /* GFX6-8 */
#define PKT3_DRAW_INDIRECT_MULTI 0x2C
#define R_2C3_DRAW_INDEX_LOC 0x2C3
#define S_2C3_THREAD_TRACE_MARKER_ENABLE(x) (((unsigned)(x)&0x1) << 29)
#define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x)&0x1) << 30)
#define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x)&0x1) << 31)
#define PKT3_DRAW_INDEX_AUTO 0x2D
#define PKT3_DRAW_INDEX_IMMD 0x2E /* GFX6 only */
#define PKT3_NUM_INSTANCES 0x2F
@ -130,9 +95,6 @@
#define WAIT_REG_MEM_PFP (1 << 8)
#define PKT3_MEM_WRITE 0x3D /* GFX6 only */
#define PKT3_INDIRECT_BUFFER 0x3F /* GFX6+ */
#define S_3F3_INHERIT_VMID_MQD_GFX(x) (((unsigned)(x)&0x1) << 22) /* userqueue only */
#define S_3F3_VALID_COMPUTE(x) (((unsigned)(x)&0x1) << 23) /* userqueue only */
#define S_3F3_INHERIT_VMID_MQD_COMPUTE(x) (((unsigned)(x)&0x1) << 30) /* userqueue only */
#define PKT3_COPY_DATA 0x40
#define COPY_DATA_SRC_SEL(x) ((x)&0xf)
#define COPY_DATA_REG 0
@ -164,7 +126,7 @@
#define PKT3_ME_INITIALIZE 0x44 /* GFX6 only */
#define PKT3_COND_WRITE 0x45
#define PKT3_EVENT_WRITE 0x46
#define EVENT_TYPE(x) ((x) << 0)
#define EVENT_TYPE(x) S_46_1_EVENT_TYPE(x)
/* 0 - any non-TS event
* 1 - ZPASS_DONE
* 2 - SAMPLE_PIPELINESTAT
@ -172,7 +134,7 @@
* 4 - *S_PARTIAL_FLUSH
* 5 - TS events
*/
#define EVENT_INDEX(x) ((x) << 8)
#define EVENT_INDEX(x) S_46_1_EVENT_INDEX(x)
#define PIXEL_PIPE_STATE_CNTL_COUNTER_ID(x) ((x) << 3)
#define PIXEL_PIPE_STATE_CNTL_STRIDE(x) ((x) << 9)
/* 0 - 32 bits

View file

@ -849,37 +849,6 @@
}
}
},
"CLEAR_STATE": {
"enum": {
"cmd": {
"push_state": {
"value": 1
},
"pop_state": {
"value": 2
}
}
},
"word": {
"1": {
"header": "PM4_TYPE_3_HEADER"
},
"2": {
"a": {
"cmd": {
"bits": "3:0",
"bits_str": "3:0",
"bits_int": 4
},
"reserved17": {
"bits": "31:4",
"bits_int": 28,
"bits_str": "31:4"
}
}
}
}
},
"CLEANER_SHADER": {
"word": {
"1": {
@ -7547,37 +7516,6 @@
}
}
},
"CLEAR_STATE": {
"enum": {
"cmd": {
"push_state": {
"value": 1
},
"pop_state": {
"value": 2
}
}
},
"word": {
"1": {
"header": "PM4_TYPE_3_HEADER"
},
"2": {
"a": {
"cmd": {
"bits": "3:0",
"bits_str": "3:0",
"bits_int": 4
},
"reserved15": {
"bits": "31:4",
"bits_int": 28,
"bits_str": "31:4"
}
}
}
}
},
"COND_WRITE": {
"enum": {
"function": {

View file

@ -4,23 +4,94 @@
# SPDX-License-Identifier: MIT
"""
The parameters must be specified in the following order and must contain 'gfx'. The gfx version
must be the last parameter. Only the header file for the specified gfx version is generated.
All other input files are only used to resolve definition conflicts. The generated header file
is written to stdout.
The last parameter determines which files is generated.
Parameters:
cp_pm4_table_data_gfx$1.json
pm4_it_opcodes_gfx$1.h
...
cp_pm4_table_data_gfx$N.json
pm4_it_opcodes_gfx$N.h
gfx$VERSION (e.g. 'gfx11')
If the last parameter is 'packets_h':
The header file with packet definitions is generated. The parameters must be specified
in the following order and must contain 'gfx'. The gfx version must be the second last
parameter. All other input files are only used to resolve definition conflicts.
Parameters:
cp_pm4_table_data_gfx$1.json
pm4_it_opcodes_gfx$1.h
...
cp_pm4_table_data_gfx$N.json
pm4_it_opcodes_gfx$N.h
gfx$VERSION (e.g. 'gfx11')
packets_h
If the last parameter is 'print_h' or 'print_c':
The packet parser is generated.
Parameters:
cp_pm4_table_data_gfx$N.json
pm4_it_opcodes_gfx$N.h
print_h OR print_c
"""
import sys, json, re
# The printer doesn't print certain variable-length packets, register-setting packets, and packets
# requiring custom printing code.
no_printer_support = {
'NOP',
'FENCE_WAIT_MULTI',
'INDIRECT_BUFFER',
'SET_CONFIG_REG',
'SET_CONTEXT_REG',
'SET_CONTEXT_REG_PAIRS',
'SET_CONTEXT_REG_PAIRS_PACKED',
'SET_SH_REG',
'SET_SH_REG_INDEX',
'SET_SH_REG_PAIRS',
'SET_SH_REG_PAIRS_PACKED',
'SET_SH_REG_PAIRS_PACKED_N',
'SET_UCONFIG_REG',
'SET_UCONFIG_REG_INDEX',
}
# Packet fields that should be printed as registers.
packet_field_register_map = {
# (name, first_bit): (register, mask)
('COHER_CNTL', 0): ('R_0301F0_CP_COHER_CNTL', ~0),
('EVENT_TYPE', 0): ('R_028A90_VGT_EVENT_INITIATOR', 0x3F),
('GCR_CNTL', 0): ('R_586_GCR_CNTL', ~0),
('DISPATCH_INITIATOR', 0): ('R_00B800_COMPUTE_DISPATCH_INITIATOR', ~0),
('DRAW_INITIATOR', 0): ('R_0287F0_VGT_DRAW_INITIATOR', ~0),
}
# Packet fields that are addresses for invoking ac_ib_handle_address.
# The whole dword must be the whole address_hi field, and the whole previous dword must be
# the whole address_lo field.
address_field_map = {
# address_hi field: (packed list, condition, count)
# - If the packet list is not empty, ac_ib_handle_address is only called for these packets.
# - If the condition is not empty, it determines whether the dwords contain an address.
# (if the condition is missing, the packet word must have only 1 variant)
# - If the count is not empty, it must be the code that returns the byte count for ac_ib_handle_address.
'ADDR_HI': ([], '', ''),
'CONTROL_BUF_ADDR_HI': ([], '', ''),
'COUNT_ADDR_HI': ([], '', ''),
'DST_MEM_ADDR_HI': ([], ('G_37_1_DST_SEL(dw0) == V_37_1_MEMORY_SYNC_ACROSS_GRBM || ' +
'G_37_1_DST_SEL(dw0) == V_37_1_TC_L2 || ' +
'G_37_1_DST_SEL(dw0) == V_37_1_MEMORY'), ''),
'INDEX_BASE_HI': ([], '', ''),
'DST_ADDR_HI': (['DMA_DATA'],
('G_50_1_DST_SEL(dw0) == V_50_1_DST_ADDR_USING_DAS || ' +
'G_50_1_DST_SEL(dw0) == V_50_1_DST_ADDR_USING_L2'),
'G_50_6_BYTE_COUNT(dw5)'),
'SRC_ADDR_HI': (['DMA_DATA'],
('G_50_1_SRC_SEL(dw0) == V_50_1_SRC_ADDR_USING_SAS || ' +
'G_50_1_SRC_SEL(dw0) == V_50_1_SRC_ADDR_USING_L2'),
'G_50_6_BYTE_COUNT(dw5)'),
'ADDRESS_HI': (['EVENT_WRITE', 'SET_BASE'],
'opcode != PKT3_EVENT_WRITE || G_46_1_EVENT_TYPE(dw0) != V_028A90_PIXEL_PIPE_STAT_CONTROL',
''),
}
engines_dict = {'pfp': 0, 'meg': 1, 'mec': 2}
@ -199,7 +270,11 @@ def print2(s1, s2):
print(s1.ljust(80) + s2)
def main():
re_opcode = re.compile(r"^\s*IT_(?P<name>\w+)\s*=\s*(?P<hex>0x[\da-fA-F]+),*$")
re_gfx_number = re.compile(r"gfx(\d+)")
def print_packet_definitions():
assert len(sys.argv) % 2 == 0 # argv = executable, N*2 input files, gfx$VERSION
num_gfx_versions = (len(sys.argv) - 2) // 2
assert num_gfx_versions > 0
@ -211,9 +286,6 @@ def main():
gfx_versions = {}
gfx_opcodes = {}
re_gfx_number = re.compile(r"gfx(\d+)")
re_opcode = re.compile(r"^\s*IT_(?P<name>\w+)\s*=\s*(?P<hex>0x[\da-fA-F]+),*$")
for i in range(num_gfx_versions):
packet_filename = sys.argv[1 + i * 2]
opcode_filename = sys.argv[1 + i * 2 + 1]
@ -370,5 +442,346 @@ def main():
str(value_int) + value_comment)
def packet_has_engine_sel(packet_dict):
return ('pfp' in packet_dict and 'meg' in packet_dict and
'engine_sel' in packet_dict['pfp']['word']['2']['a'])
def print_enum_table(packet_name, packet_dict):
# Gather a merged enum table from all engines.
for engine_name, packet in packet_dict.items():
if 'enum' not in packet:
continue
# Packets that have both PFP and MEG definitions and don't have ENGINE_SEL are parsed as PFP,
# so ignore MEG enums.
if engine_name == 'meg' and 'pfp' in packet_dict and not packet_has_engine_sel(packet_dict):
continue;
enums = packet['enum'] if 'enum' in packet else {}
table = {}
for field_name, values in enums.items():
assert len(values) > 0
if field_name not in table:
table[field_name] = {}
for value_name, value_item in values.items():
value = value_item['value']
if value_name.startswith('reserved'):
continue
if value in table[field_name]:
if table[field_name][value] != value_name:
print('// Enum conflict: Packet %s field %s has value %d = %s, but the table already has %d = %s' %
(packet_name, field_name, value, value_name.upper(), value, table[field_name][value].upper()))
else:
table[field_name][value] = value_name
for field_name, values in table.items():
print('')
print('static const char *%s_%s_%s[] = {' % (engine_name, packet_name, field_name))
for value, value_name in table[field_name].items():
print(3 * ' ' + '[%d] = "%s",' % (value, value_name.upper()))
print('};')
def print_packet(packet_name, packet_dict, engine_name, dword0_read):
if engine_name not in packet_dict:
print(9 * ' ' + 'fprintf(stderr, "amdgpu: packet %s is not supported by %s\\n");' %
(packet_name, engine_name.upper()))
print(9 * ' ' + 'assert(0 && "packet %s is not supported by %s");' % (packet_name, engine_name.upper()))
return
packet = packet_dict[engine_name]
enums = packet['enum'] if 'enum' in packet else {}
words = packet['word']
seen_variable_length_word = False
# Some packets need dwords to be loaded first if the byte count is after the address words.
load_dwords_first = packet_name == 'DMA_DATA'
if load_dwords_first:
for word_index, word_variants in words.items():
if int(word_index) == 1:
continue # it's the packet header.
if int(word_index) == 2 and dword0_read:
continue
# Don't load any variable-length fields here.
has_variable_length_field = False
for _, word_variant in word_variants.items():
has_variable_length_field = has_variable_length_field or len([x for x in word_variant.keys() if '[]' in x]) > 0
if has_variable_length_field:
continue
word_index_0based = int(word_index) - 2
if len(word_variants) == 0:
print(9 * ' ' + 'if (%d <= pkt_count_field) ac_ib_get(ib);' % word_index_0based)
else:
print(9 * ' ' + 'uint32_t dw%d = %d <= pkt_count_field ? ac_ib_get(ib) : 0;' % (word_index_0based, word_index_0based))
# Print the dwords.
for word_index, word_variants in words.items():
if int(word_index) == 1:
continue # it's the packet header.
get_dword = (int(word_index) > 2 or not dword0_read) and not load_dwords_first
word_index_0based = int(word_index) - 2
dword_var = 'dw%d' % word_index_0based
# Parse the dword.
for word_variant_name, word_variant in word_variants.items():
prefix = ('[%s]' % word_variant_name.upper()) if len(word_variants) > 1 else ''
num_printed_fields = len([field_name for field_name in word_variant.keys()
if not field_name.startswith('reserved') and not field_name.startswith('dummy')])
# If any field (it should be exactly one field) contains [], it's a variable-length packet.
num_var_length_fields = len([x for x in word_variant.keys() if '[]' in x])
if num_var_length_fields > 0:
assert num_var_length_fields == 1
seen_variable_length_word = True
if packet_name == 'WRITE_DATA':
assert word_index_0based == 3
print(9 * ' ' + 'for (unsigned i = 0; i < pkt_count_field - 3; i++)')
print(12 * ' ' + 'ac_print_data_dword(ib->f, ac_ib_get(ib), "data");')
else:
assert False, 'unexpected variable-length packet: %s' % packet_name
continue
assert not seen_variable_length_word
# Get the next dword if needed.
if get_dword:
if word_index_0based > 0:
print('')
if len(word_variant) == 0:
print(9 * ' ' + 'ac_ib_get(ib);')
else:
print(9 * ' ' + 'uint32_t %s = ac_ib_get(ib);' % dword_var)
get_dword = False
# Iterate over all fields.
for field_name, field in word_variant.items():
# Get field bits.
first_bit, last_bit = get_field_bits(field)
num_bits = last_bit - first_bit + 1
bitmask = (1 << num_bits) - 1
if field_name.startswith('reserved') or field_name.startswith('dummy'):
# If a word has multiple variants, a reserved field in one variant may be used by another variant,
# and we don't know which word variant is used, so ignore reserved fields.
if len(word_variants) == 1:
if num_bits == 32:
print(9 * ' ' + 'assert(!%s && "reserved packet fields should be 0 for %s, word %d");' %
(dword_var, packet_name, word_index_0based))
else:
print(9 * ' ' + 'assert(!((%s >> %d) & 0x%x) && "reserved packet fields should be 0 for %s, word %d");' %
(dword_var, first_bit, bitmask, packet_name, word_index_0based))
continue
# Some address fields don't use the first 2-3 bits. Include them anyway.
if num_printed_fields == 1 and first_bit + num_bits == 32 and first_bit <= 8:
num_bits = 32
# Extract the field value if needed.
if num_bits < 32:
field_var = '%s%s_%s' % (dword_var, '' if len(word_variants) == 1 else word_variant_name.upper(), field_name)
print(9 * ' ' + 'uint32_t %s = (%s >> %d) & 0x%x;' % (field_var, dword_var, first_bit, bitmask))
else:
field_var = dword_var
register_map_key = (field_name.upper(), first_bit)
# Choose one of the methods of printing the field
if field_name in enums:
# Print it as an enum value string
enum_array = '%s_%s_%s' % (engine_name, packet_name, field_name)
value_name_var = '%s_str' % field_var
print(9 * ' ' + 'const char *%s = %s < ARRAY_SIZE(%s) ?' % (value_name_var, field_var, enum_array));
print(9 * ' ' + ' %s[%s] : NULL;' % (enum_array, field_var))
print(9 * ' ' + 'assert(%s && "invalid/reserved values shouldn\'t be present");' % value_name_var)
print(9 * ' ' + 'ac_print_string_value(ib->f, "%s%s", %s);' % (prefix, field_name.upper(), value_name_var))
elif register_map_key in packet_field_register_map:
# Print it as a register
reg_name, mask = packet_field_register_map[register_map_key]
print(9 * ' ' + 'ac_dump_reg(ib->f, ib->gfx_level, ib->family, %s, %s, %s);' %
(reg_name, field_var, hex(mask) if mask >= 0 else '~0'))
else:
# Print it as a regular value
print(9 * ' ' + 'ac_print_named_value(ib->f, "%s%s", %s, %d);' %
(prefix, field_name.upper(), field_var, num_bits))
# If the field is an address, invoke ac_ib_handle_address.
if field_name.upper() in address_field_map:
packet_list, addr_condition, count = address_field_map[field_name.upper()]
indent = 9
if len(packet_list) == 0 or packet_name in packet_list:
assert len(addr_condition) > 0 or len(word_variants) == 1
if len(addr_condition) > 0:
print(9 * ' ' + 'if (%s)' % addr_condition)
indent = 12
print(indent * ' ' + 'ac_ib_handle_address(ib, %s, %s, %s);' %
('dw%d' % (word_index_0based - 1), dword_var, '0' if count == '' else count))
# Stop printing if that was the last word of the packet.
if word_index_0based < len(words) - 2:
print(9 * ' ' + 'if (pkt_count_field == %d) break;' % word_index_0based)
def should_skip_packet(packet_name):
# TODO: This packet conflicts with INDIRECT_BUFFER (same opcode number), but we may need to handle it somehow
return packet_name == 'COND_INDIRECT_BUFFER'
def get_packet_dict(engines, packet_name):
# Get a dictionary of the packet definition where the engine name is the top-level key.
packet_dict = {}
for engine_name, packets in engines.items():
if packet_name in packets:
packet_dict[engine_name] = packets[packet_name]
return packet_dict
def print_packet_parser(is_header):
gfx_version = 'gfx' + re_gfx_number.search(sys.argv[1]).group(1)
# Load the packet file
engines = json.load(open(sys.argv[1], 'r', encoding='utf-8'))['pm4_packets']
# Load the opcode file
opcode_file = open(sys.argv[2], 'r', encoding='utf-8')
opcodes = {}
for line in opcode_file:
match = re_opcode.match(line)
if match:
opcodes[match['name']] = int(match['hex'], 16)
print(
"""/* This file is automatically generated. DO NOT EDIT.
*
* Copyright 2026 Advanced Micro Devices, Inc.
* SPDX-License-Identifier: MIT
*/
""")
if is_header:
print('#ifndef AMD_CP_IB_PARSER_%s' % gfx_version.upper())
print('#define AMD_CP_IB_PARSER_%s' % gfx_version.upper())
print('')
print('#include "ac_debug.h"')
else:
print('#include "amd_cp_print_packet_%s.h"' % gfx_version)
print('#include "amd_cp_packets_%s.h"' % gfx_version)
print('#include "amdgfxregs.h"')
# Generate enum-to-string tables.
if not is_header:
for packet_name, value in opcodes.items():
if not should_skip_packet(packet_name) and packet_name not in no_printer_support:
print_enum_table(packet_name, get_packet_dict(engines, packet_name))
print('')
print('/* Print the packet and use assertions to validate its content. */')
print('void')
print('amd_cp_print_packet_%s(struct ac_ib_parser *ib, unsigned opcode, unsigned pkt_count_field)%s'
% (gfx_version, ';' if is_header else ''))
if is_header:
print('')
print('#endif')
return
print('{')
print(3 * ' ' + 'switch (opcode) {')
# Generate packet parser cases.
for packet_name, value in opcodes.items():
skip_packet = should_skip_packet(packet_name)
if skip_packet:
print('#if 0')
packet_dict = get_packet_dict(engines, packet_name)
print(3 * ' ' + 'case 0x%X: { /* PKT3_%s */' % (value, packet_name))
if packet_name in no_printer_support:
print(6 * ' ' + 'UNREACHABLE("the caller should handle %s");' % packet_name)
else:
has_engine_sel = packet_has_engine_sel(packet_dict)
if has_engine_sel:
print(6 * ' ' + 'uint32_t dw0 = ac_ib_get(ib);')
print('')
print(6 * ' ' + 'if (ib->ip_type == AMD_IP_COMPUTE) {')
if has_engine_sel:
# Generate an expression that checks ENGINE_SEL
engine_sel_infix = ('%X_1%s' %
(opcodes[packet_name], '' if len(packet_dict['pfp']['word']['2']) == 1 else 'A'))
engine_sel_getter = 'G_%s_ENGINE_SEL' % engine_sel_infix
if 'pfp' in packet_dict['pfp']['enum']['engine_sel']:
pfp_value_name = 'PFP'
elif 'prefetch_parser' in packet_dict['pfp']['enum']['engine_sel']:
pfp_value_name = 'PREFETCH_PARSER'
else:
assert False, 'ENGINE_SEL doesn''t contain PFP or PREFETCH_PARSER'
pfp_value = 'V_%s_%s' % (engine_sel_infix, pfp_value_name)
print_packet(packet_name, packet_dict, 'mec', True)
# Parse both PFP and MEG packet variants.
print(6 * ' ' + '} else if (%s(dw0) == %s) {' % (engine_sel_getter, pfp_value))
print_packet(packet_name, packet_dict, 'pfp', True)
print(6 * ' ' + '} else {')
print_packet(packet_name, packet_dict, 'meg', True)
else:
print_packet(packet_name, packet_dict, 'mec', False)
print(6 * ' ' + '} else {')
print_packet(packet_name, packet_dict, 'pfp' if 'pfp' in packet_dict else 'meg', False)
print(6 * ' ' + '}')
print(6 * ' ' + 'break;')
print(3 * ' ' + '}')
if skip_packet:
print('#endif')
print('')
print(3 * ' ' + 'default:')
print(6 * ' ' + 'fprintf(stderr, "amdgpu: cannot decode packet 0x%x\\n", opcode);')
print(6 * ' ' + 'break;')
print(3 * ' ' + '}')
print('}')
if __name__ == "__main__":
main()
last = sys.argv.pop()
if last == 'packets_h':
print_packet_definitions()
elif last == 'print_c':
print_packet_parser(False)
elif last == 'print_h':
print_packet_parser(True)
else:
assert False, 'the last parameter must be "header" or "parser"'

View file

@ -8,7 +8,6 @@
enum IT_OpCodeType {
IT_NOP = 0x10,
IT_SET_BASE = 0x11,
IT_CLEAR_STATE = 0x12,
IT_INDEX_BUFFER_SIZE = 0x13,
IT_DISPATCH_DIRECT = 0x15,
IT_DISPATCH_INDIRECT = 0x16,

View file

@ -10609,8 +10609,8 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index
radeon_emit(0);
radeon_emit(vertex_offset_reg);
radeon_emit(start_instance_reg);
radeon_emit(draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C3_COUNT_INDIRECT_ENABLE(!!count_va) |
S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en));
radeon_emit(draw_id_reg | S_2C_4_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C_4_COUNT_INDIRECT_ENABLE(!!count_va) |
S_2C_4_THREAD_TRACE_MARKER_ENABLE(sqtt_en));
radeon_emit(draw_count); /* count */
radeon_emit(count_va); /* count_addr */
radeon_emit(count_va >> 32);
@ -15060,7 +15060,7 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi
if (va) {
assert(pred_op == PREDICATION_OP_BOOL32 || pred_op == PREDICATION_OP_BOOL64);
op = PRED_OP(pred_op);
op = S_20_1_PRED_OP(pred_op);
/* PREDICATION_DRAW_VISIBLE means that if the 32-bit value is zero, all
* rendering commands are discarded. Otherwise, they are discarded if

View file

@ -1410,8 +1410,8 @@ dgc_emit_pkt3_draw_indirect(struct dgc_cmdbuf *cs, nir_def *has_drawid, bool ind
dgc_cs_emit_imm(0);
dgc_cs_emit(vertex_offset_reg);
dgc_cs_emit(nir_bcsel(b, has_baseinstance, start_instance_reg, nir_imm_int(b, 0)));
dgc_cs_emit(nir_ior_imm(b, nir_ior(b, draw_id_reg, nir_imm_int(b, S_2C3_DRAW_INDEX_ENABLE(1))),
S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en)));
dgc_cs_emit(nir_ior_imm(b, nir_ior(b, draw_id_reg, nir_imm_int(b, S_2C_4_DRAW_INDEX_ENABLE(1))),
S_2C_4_THREAD_TRACE_MARKER_ENABLE(sqtt_en)));
dgc_cs_emit_imm(1); /* draw count */
dgc_cs_emit_imm(0); /* count va low */
dgc_cs_emit_imm(0); /* count va high */
@ -1542,7 +1542,7 @@ dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *s
nir_def *start_instance_reg =
nir_bcsel(b, has_baseinstance, nir_iadd(b, vertex_offset_reg, start_instance_offset), nir_imm_int(b, 0));
nir_def *draw_id_reg = nir_bcsel(
b, has_drawid, nir_ior_imm(b, nir_iadd(b, vertex_offset_reg, nir_imm_int(b, 1)), S_2C3_DRAW_INDEX_ENABLE(1)),
b, has_drawid, nir_ior_imm(b, nir_iadd(b, vertex_offset_reg, nir_imm_int(b, 1)), S_2C_4_DRAW_INDEX_ENABLE(1)),
nir_imm_int(b, 0));
nir_def *di_src_sel = nir_imm_int(b, indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX);
@ -1555,7 +1555,7 @@ dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *s
dgc_cs_emit_imm(0);
dgc_cs_emit(vertex_offset_reg);
dgc_cs_emit(start_instance_reg);
dgc_cs_emit(nir_ior_imm(b, draw_id_reg, S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en)));
dgc_cs_emit(nir_ior_imm(b, draw_id_reg, S_2C_4_THREAD_TRACE_MARKER_ENABLE(sqtt_en)));
dgc_cs_emit(draw_count);
dgc_cs_emit_imm(0);
dgc_cs_emit_imm(0);

View file

@ -686,8 +686,8 @@ radv_emit_graphics(struct radv_device *device, struct radv_cmd_stream *cs)
if (!device->uses_shadow_regs) {
ac_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));
ac_pm4_cmd_add(pm4, S_28_1_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(pm4, S_28_2_UPDATE_SHADOW_ENABLES(1));
if (has_clear_state) {
ac_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0));
@ -1543,7 +1543,7 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool
if (!unlock) {
uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET;
ac_emit_cp_atomic_mem(cs->b, TC_OP_ATOMIC_CMPSWAP_32, ATOMIC_COMMAND_LOOP, mutex_va, 1, 0);
ac_emit_cp_atomic_mem(cs->b, V_1E_1_GL2_OP_ATOMIC_CMPSWAP_32, V_1E_1_LOOP_UNTIL_COMPARE_SATISFIED, mutex_va, 1, 0);
}
uint64_t va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET;

View file

@ -29,13 +29,13 @@ bool si_init_cp_reg_shadowing(struct si_context *sctx)
}
ac_pm4_cmd_add(shadowing_pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(shadowing_pm4, CC0_UPDATE_LOAD_ENABLES(1) |
CC0_LOAD_PER_CONTEXT_STATE(1) | CC0_LOAD_CS_SH_REGS(1) |
CC0_LOAD_GFX_SH_REGS(1) | CC0_LOAD_GLOBAL_UCONFIG(1));
ac_pm4_cmd_add(shadowing_pm4, CC1_UPDATE_SHADOW_ENABLES(1) |
CC1_SHADOW_PER_CONTEXT_STATE(1) | CC1_SHADOW_CS_SH_REGS(1) |
CC1_SHADOW_GFX_SH_REGS(1) | CC1_SHADOW_GLOBAL_UCONFIG(1) |
CC1_SHADOW_GLOBAL_CONFIG(1));
ac_pm4_cmd_add(shadowing_pm4, S_28_1_UPDATE_LOAD_ENABLES(1) |
S_28_1_LOAD_PER_CONTEXT_STATE(1) | S_28_1_LOAD_CS_SH_REGS(1) |
S_28_1_LOAD_GFX_SH_REGS(1) | S_28_1_LOAD_GLOBAL_UCONFIG(1));
ac_pm4_cmd_add(shadowing_pm4, S_28_2_UPDATE_SHADOW_ENABLES(1) |
S_28_2_SHADOW_PER_CONTEXT_STATE(1) | S_28_2_SHADOW_CS_SH_REGS(1) |
S_28_2_SHADOW_GFX_SH_REGS(1) | S_28_2_SHADOW_GLOBAL_UCONFIG(1) |
S_28_2_SHADOW_GLOBAL_CONFIG(1));
for (unsigned i = 0; i < SI_NUM_REG_RANGES; i++)
ac_build_load_reg(&sctx->screen->info, shadowing_pm4, i,

View file

@ -1083,7 +1083,7 @@ static void si_emit_query_predication(struct si_context *ctx, unsigned index)
struct gfx11_sh_query *gfx10_query = (struct gfx11_sh_query *)query;
struct gfx11_sh_query_buffer *qbuf, *first, *last;
op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
op = S_20_1_PRED_OP(PREDICATION_OP_PRIMCOUNT);
/* if true then invert, see GL_ARB_conditional_render_inverted */
if (!invert)
@ -1131,17 +1131,17 @@ static void si_emit_query_predication(struct si_context *ctx, unsigned index)
struct si_query_buffer *qbuf;
if (query->workaround_buf) {
op = PRED_OP(PREDICATION_OP_BOOL64);
op = S_20_1_PRED_OP(PREDICATION_OP_BOOL64);
} else {
switch (query->b.type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
op = PRED_OP(PREDICATION_OP_ZPASS);
op = S_20_1_PRED_OP(PREDICATION_OP_ZPASS);
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
op = S_20_1_PRED_OP(PREDICATION_OP_PRIMCOUNT);
invert = !invert;
break;
default:

View file

@ -107,8 +107,8 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs)
switch (ip_type) {
case AMD_IP_GFX:
radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
radeon_emit(S_28_1_UPDATE_LOAD_ENABLES(1));
radeon_emit(S_28_2_UPDATE_SHADOW_ENABLES(1));
break;
case AMD_IP_COMPUTE:
radeon_emit(PKT3(PKT3_NOP, 0, 0));
@ -160,8 +160,8 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs)
switch (ip_type) {
case AMD_IP_GFX:
radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
radeon_emit(S_28_1_UPDATE_LOAD_ENABLES(1));
radeon_emit(S_28_2_UPDATE_SHADOW_ENABLES(1));
break;
case AMD_IP_COMPUTE:
radeon_emit(PKT3(PKT3_NOP, 0, 0));

View file

@ -4868,8 +4868,8 @@ static bool gfx6_init_gfx_preamble_state(struct si_context *sctx)
if (sctx->is_gfx_queue && !sctx->uses_kernelq_reg_shadowing) {
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1));
if (sscreen->dpbb_allowed) {
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0));
@ -4958,17 +4958,17 @@ static bool gfx10_init_gfx_preamble_state(struct si_context *sctx)
*/
if (sctx->gfx_level != GFX11_5) {
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1) | CC0_LOAD_PER_CONTEXT_STATE(1) |
CC0_LOAD_CS_SH_REGS(1) | CC0_LOAD_GFX_SH_REGS(1) |
CC0_LOAD_GLOBAL_UCONFIG(1));
ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1) | CC1_SHADOW_PER_CONTEXT_STATE(1) |
CC1_SHADOW_CS_SH_REGS(1) | CC1_SHADOW_GFX_SH_REGS(1) |
CC1_SHADOW_GLOBAL_UCONFIG(1) | CC1_SHADOW_GLOBAL_CONFIG(1));
ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1) | S_28_1_LOAD_PER_CONTEXT_STATE(1) |
S_28_1_LOAD_CS_SH_REGS(1) | S_28_1_LOAD_GFX_SH_REGS(1) |
S_28_1_LOAD_GLOBAL_UCONFIG(1));
ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1) | S_28_2_SHADOW_PER_CONTEXT_STATE(1) |
S_28_2_SHADOW_CS_SH_REGS(1) | S_28_2_SHADOW_GFX_SH_REGS(1) |
S_28_2_SHADOW_GLOBAL_UCONFIG(1) | S_28_2_SHADOW_GLOBAL_CONFIG(1));
}
} else if (sctx->is_gfx_queue && !sctx->uses_kernelq_reg_shadowing) {
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1));
if (sscreen->dpbb_allowed) {
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0));
@ -5039,16 +5039,16 @@ static bool gfx12_init_gfx_preamble_state(struct si_context *sctx)
if (sctx->uses_userq_reg_shadowing) {
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1) | CC0_LOAD_PER_CONTEXT_STATE(1) |
CC0_LOAD_CS_SH_REGS(1) | CC0_LOAD_GFX_SH_REGS(1) |
CC0_LOAD_GLOBAL_UCONFIG(1));
ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1) | CC1_SHADOW_PER_CONTEXT_STATE(1) |
CC1_SHADOW_CS_SH_REGS(1) | CC1_SHADOW_GFX_SH_REGS(1) |
CC1_SHADOW_GLOBAL_UCONFIG(1) | CC1_SHADOW_GLOBAL_CONFIG(1));
ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1) | S_28_1_LOAD_PER_CONTEXT_STATE(1) |
S_28_1_LOAD_CS_SH_REGS(1) | S_28_1_LOAD_GFX_SH_REGS(1) |
S_28_1_LOAD_GLOBAL_UCONFIG(1));
ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1) | S_28_2_SHADOW_PER_CONTEXT_STATE(1) |
S_28_2_SHADOW_CS_SH_REGS(1) | S_28_2_SHADOW_GFX_SH_REGS(1) |
S_28_2_SHADOW_GLOBAL_UCONFIG(1) | S_28_2_SHADOW_GLOBAL_CONFIG(1));
} else if (sctx->is_gfx_queue && !sctx->uses_kernelq_reg_shadowing) {
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1));
ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1));
}
if (sctx->is_gfx_queue && sscreen->dpbb_allowed && !sctx->uses_userq_reg_shadowing) {

View file

@ -1610,8 +1610,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
radeon_emit(((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
S_2C3_DRAW_INDEX_ENABLE(sctx->vs_uses_draw_id) |
S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
S_2C_4_DRAW_INDEX_ENABLE(sctx->vs_uses_draw_id) |
S_2C_4_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
radeon_emit(indirect->draw_count);
radeon_emit(count_va);
radeon_emit(count_va >> 32);

View file

@ -1495,17 +1495,17 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws,
amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo));
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo) >> 32);
amdgpu_pkt_add_dw(userq->f32_shadowing_ib_pm4_dw | S_3F3_INHERIT_VMID_MQD_GFX(1));
amdgpu_pkt_add_dw(userq->f32_shadowing_ib_pm4_dw | S_3F_3_INHERIT_VMID_PFP(1));
}
amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
amdgpu_pkt_add_dw(csc->chunk_ib[IB_MAIN].va_start);
amdgpu_pkt_add_dw(csc->chunk_ib[IB_MAIN].va_start >> 32);
if (userq->ip_type == AMD_IP_GFX)
amdgpu_pkt_add_dw((csc->chunk_ib[IB_MAIN].ib_bytes / 4) | S_3F3_INHERIT_VMID_MQD_GFX(1));
amdgpu_pkt_add_dw((csc->chunk_ib[IB_MAIN].ib_bytes / 4) | S_3F_3_INHERIT_VMID_PFP(1));
else
amdgpu_pkt_add_dw((csc->chunk_ib[IB_MAIN].ib_bytes / 4) | S_3F3_VALID_COMPUTE(1) |
S_3F3_INHERIT_VMID_MQD_COMPUTE(1));
amdgpu_pkt_add_dw((csc->chunk_ib[IB_MAIN].ib_bytes / 4) | S_3F_3_VALID(1) |
S_3F_3_INHERIT_VMID_MEC(1));
/* Add 8 for release mem packet and 2 for protected fence signal packet.
* Calculcating userq_fence_seq_num this way to match with kernel fence that is
@ -1549,7 +1549,7 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws,
for (unsigned i = 0; i < 1 + DIV_ROUND_UP(num_fences, 4); i++)
*cond_exec_skip_counts[i].count_dw_ptr = (amdgpu_pkt_get_next_wptr() -
cond_exec_skip_counts[i].start_wptr) |
COND_EXEC_USERQ_OVERRULE_CMD;
S_22_4_EXEC_USERQ_OVERRULE_CMD(1);
}
} else {
mesa_loge("amdgpu: unsupported userq ip submission = %d\n", userq->ip_type);

View file

@ -320,7 +320,7 @@ amdgpu_userq_submit_cs_preamble_ib_once(struct radeon_cmdbuf *rcs, struct ac_pm4
amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->cs_preamble_ib_bo));
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->cs_preamble_ib_bo) >> 32);
amdgpu_pkt_add_dw(pm4->ndw | S_3F3_INHERIT_VMID_MQD_GFX(1));
amdgpu_pkt_add_dw(pm4->ndw | S_3F_3_INHERIT_VMID_PFP(1));
amdgpu_pkt_end();
simple_mtx_unlock(&userq->lock);
@ -367,7 +367,7 @@ amdgpu_userq_f32_init_reg_shadowing(struct radeon_cmdbuf *rcs, struct ac_pm4_sta
amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo));
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo) >> 32);
amdgpu_pkt_add_dw(pm4->ndw | S_3F3_INHERIT_VMID_MQD_GFX(1));
amdgpu_pkt_add_dw(pm4->ndw | S_3F_3_INHERIT_VMID_PFP(1));
amdgpu_pkt_end();
simple_mtx_unlock(&userq->lock);