From 4478b04a016a714bf5d38f98b675c91e27b70d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 1 Mar 2026 20:33:32 -0500 Subject: [PATCH 1/4] amd/packets: remove non-existent CLEAR_STATE from gfx12 definitions --- src/amd/packets/cp_pm4_table_data_gfx12.json | 62 -------------------- src/amd/packets/pm4_it_opcodes_gfx12.h | 1 - 2 files changed, 63 deletions(-) diff --git a/src/amd/packets/cp_pm4_table_data_gfx12.json b/src/amd/packets/cp_pm4_table_data_gfx12.json index ca8e0921676..883ebc839cb 100644 --- a/src/amd/packets/cp_pm4_table_data_gfx12.json +++ b/src/amd/packets/cp_pm4_table_data_gfx12.json @@ -849,37 +849,6 @@ } } }, - "CLEAR_STATE": { - "enum": { - "cmd": { - "push_state": { - "value": 1 - }, - "pop_state": { - "value": 2 - } - } - }, - "word": { - "1": { - "header": "PM4_TYPE_3_HEADER" - }, - "2": { - "a": { - "cmd": { - "bits": "3:0", - "bits_str": "3:0", - "bits_int": 4 - }, - "reserved17": { - "bits": "31:4", - "bits_int": 28, - "bits_str": "31:4" - } - } - } - } - }, "CLEANER_SHADER": { "word": { "1": { @@ -7547,37 +7516,6 @@ } } }, - "CLEAR_STATE": { - "enum": { - "cmd": { - "push_state": { - "value": 1 - }, - "pop_state": { - "value": 2 - } - } - }, - "word": { - "1": { - "header": "PM4_TYPE_3_HEADER" - }, - "2": { - "a": { - "cmd": { - "bits": "3:0", - "bits_str": "3:0", - "bits_int": 4 - }, - "reserved15": { - "bits": "31:4", - "bits_int": 28, - "bits_str": "31:4" - } - } - } - } - }, "COND_WRITE": { "enum": { "function": { diff --git a/src/amd/packets/pm4_it_opcodes_gfx12.h b/src/amd/packets/pm4_it_opcodes_gfx12.h index 0404fa9e24c..d3e9fd8f89c 100644 --- a/src/amd/packets/pm4_it_opcodes_gfx12.h +++ b/src/amd/packets/pm4_it_opcodes_gfx12.h @@ -8,7 +8,6 @@ enum IT_OpCodeType { IT_NOP = 0x10, IT_SET_BASE = 0x11, - IT_CLEAR_STATE = 0x12, IT_INDEX_BUFFER_SIZE = 0x13, IT_DISPATCH_DIRECT = 0x15, IT_DISPATCH_INDIRECT = 0x16, From f3fc8b0934881e36db9afde4a384613a293fd7a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 1 Mar 2026 21:13:02 -0500 Subject: [PATCH 2/4] amd: generate a packet parser/printer automatically from packet definitions The next commit will enable this. --- src/amd/common/ac_debug.h | 5 + src/amd/common/ac_parse_ib.c | 22 +- src/amd/common/meson.build | 25 +- .../packets/parse_cp_pm4_table_data_json.py | 445 +++++++++++++++++- 4 files changed, 478 insertions(+), 19 deletions(-) diff --git a/src/amd/common/ac_debug.h b/src/amd/common/ac_debug.h index d62f9d77ea2..31ed9796245 100644 --- a/src/amd/common/ac_debug.h +++ b/src/amd/common/ac_debug.h @@ -102,8 +102,13 @@ struct ac_ib_parser { unsigned cur_dw; }; +void ac_print_data_dword(FILE *file, uint32_t value, const char *comment); +void ac_print_named_value(FILE *file, const char *name, uint32_t value, int bits); +void ac_print_string_value(FILE *file, const char *name, const char *value); void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, enum radeon_family family, unsigned offset, uint32_t value, uint32_t field_mask); +uint32_t ac_ib_get(struct ac_ib_parser *ib); +void ac_ib_handle_address(struct ac_ib_parser *ib, uint32_t addr_lo, uint32_t addr_hi, uint32_t size); void ac_parse_ib_chunk(struct ac_ib_parser *ib); void ac_parse_ib(struct ac_ib_parser *ib, const char *name); diff --git a/src/amd/common/ac_parse_ib.c b/src/amd/common/ac_parse_ib.c index e3d3d47c81c..320db7cb1b3 100644 --- a/src/amd/common/ac_parse_ib.c +++ b/src/amd/common/ac_parse_ib.c @@ -96,6 +96,21 @@ static void print_string_value(FILE *file, const char *name, const char *value) fprintf(file, "%s\n", value); } +void ac_print_data_dword(FILE *file, uint32_t value, const char *comment) +{ + print_data_dword(file, value, comment); +} + +void ac_print_named_value(FILE *file, const char *name, uint32_t value, int bits) +{ + print_named_value(file, name, value, bits); +} + +void ac_print_string_value(FILE *file, const char *name, const char *value) +{ + print_string_value(file, name, value); +} + void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, enum radeon_family family, unsigned offset, uint32_t value, uint32_t field_mask) { @@ -139,7 +154,7 @@ void ac_dump_reg(FILE *file, enum amd_gfx_level gfx_level, enum radeon_family fa O_COLOR_RESET, value); } -static uint32_t ac_ib_get(struct ac_ib_parser *ib) +uint32_t ac_ib_get(struct ac_ib_parser *ib) { uint32_t v = 0; @@ -227,6 +242,11 @@ static void ac_parse_set_reg_pairs_packed_packet(FILE *f, unsigned count, unsign #define AC_ADDR_SIZE_NOT_MEMORY 0xFFFFFFFF +void ac_ib_handle_address(struct ac_ib_parser *ib, uint32_t addr_lo, uint32_t addr_hi, uint32_t size) +{ + /* stub */ +} + static void print_addr(struct ac_ib_parser *ib, const char *name, uint64_t addr, uint32_t size) { FILE *f = ib->f; diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index f9d8cd85db3..9f2eb3bcf1f 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -33,13 +33,34 @@ amd_packet_files = [ ] amd_packet_headers = [] +amd_ib_parsers = [] foreach gen : ['gfx11', 'gfx12'] amd_packet_headers += custom_target( 'amd_cp_packets_' + gen + '.h', input : ['../packets/parse_cp_pm4_table_data_json.py', amd_packet_files], output : 'amd_cp_packets_' + gen + '.h', - command : [prog_python, '@INPUT@', gen], + command : [prog_python, '@INPUT@', gen, 'packets_h'], + capture : true, + ) + + amd_ib_parsers += custom_target( + 'amd_cp_print_packet_' + gen + '.c', + input : ['../packets/parse_cp_pm4_table_data_json.py', + '../packets/cp_pm4_table_data_' + gen + '.json', + '../packets/pm4_it_opcodes_' + gen + '.h'], + output : 'amd_cp_print_packet_' + gen + '.c', + command : [prog_python, '@INPUT@', gen, 'print_c'], + capture : true, + ) + + amd_ib_parsers += custom_target( + 'amd_cp_print_packet_' + gen + '.h', + input : ['../packets/parse_cp_pm4_table_data_json.py', + '../packets/cp_pm4_table_data_' + gen + '.json', + '../packets/pm4_it_opcodes_' + gen + '.h'], + output : 'amd_cp_print_packet_' + gen + '.h', + command : [prog_python, '@INPUT@', gen, 'print_h'], capture : true, ) endforeach @@ -159,7 +180,7 @@ amd_common_files = files( 'nir/ac_nir_prerast_utils.c', 'nir/ac_nir_surface.c', 'nir/ac_nir_surface.h', -) +) + amd_ib_parsers if not with_platform_windows amd_common_files += files( diff --git a/src/amd/packets/parse_cp_pm4_table_data_json.py b/src/amd/packets/parse_cp_pm4_table_data_json.py index 678f534469a..8bbcc8c934d 100644 --- a/src/amd/packets/parse_cp_pm4_table_data_json.py +++ b/src/amd/packets/parse_cp_pm4_table_data_json.py @@ -4,23 +4,94 @@ # SPDX-License-Identifier: MIT """ -The parameters must be specified in the following order and must contain 'gfx'. The gfx version -must be the last parameter. Only the header file for the specified gfx version is generated. -All other input files are only used to resolve definition conflicts. The generated header file -is written to stdout. +The last parameter determines which files is generated. -Parameters: - cp_pm4_table_data_gfx$1.json - pm4_it_opcodes_gfx$1.h - ... - cp_pm4_table_data_gfx$N.json - pm4_it_opcodes_gfx$N.h - gfx$VERSION (e.g. 'gfx11') +If the last parameter is 'packets_h': + The header file with packet definitions is generated. The parameters must be specified + in the following order and must contain 'gfx'. The gfx version must be the second last + parameter. All other input files are only used to resolve definition conflicts. + + Parameters: + cp_pm4_table_data_gfx$1.json + pm4_it_opcodes_gfx$1.h + ... + cp_pm4_table_data_gfx$N.json + pm4_it_opcodes_gfx$N.h + gfx$VERSION (e.g. 'gfx11') + packets_h + +If the last parameter is 'print_h' or 'print_c': + + The packet parser is generated. + + Parameters: + cp_pm4_table_data_gfx$N.json + pm4_it_opcodes_gfx$N.h + print_h OR print_c """ import sys, json, re +# The printer doesn't print certain variable-length packets, register-setting packets, and packets +# requiring custom printing code. +no_printer_support = { + 'NOP', + 'FENCE_WAIT_MULTI', + 'INDIRECT_BUFFER', + 'SET_CONFIG_REG', + 'SET_CONTEXT_REG', + 'SET_CONTEXT_REG_PAIRS', + 'SET_CONTEXT_REG_PAIRS_PACKED', + 'SET_SH_REG', + 'SET_SH_REG_INDEX', + 'SET_SH_REG_PAIRS', + 'SET_SH_REG_PAIRS_PACKED', + 'SET_SH_REG_PAIRS_PACKED_N', + 'SET_UCONFIG_REG', + 'SET_UCONFIG_REG_INDEX', +} + +# Packet fields that should be printed as registers. +packet_field_register_map = { + # (name, first_bit): (register, mask) + ('COHER_CNTL', 0): ('R_0301F0_CP_COHER_CNTL', ~0), + ('EVENT_TYPE', 0): ('R_028A90_VGT_EVENT_INITIATOR', 0x3F), + ('GCR_CNTL', 0): ('R_586_GCR_CNTL', ~0), + ('DISPATCH_INITIATOR', 0): ('R_00B800_COMPUTE_DISPATCH_INITIATOR', ~0), + ('DRAW_INITIATOR', 0): ('R_0287F0_VGT_DRAW_INITIATOR', ~0), +} + +# Packet fields that are addresses for invoking ac_ib_handle_address. +# The whole dword must be the whole address_hi field, and the whole previous dword must be +# the whole address_lo field. +address_field_map = { + # address_hi field: (packed list, condition, count) + # - If the packet list is not empty, ac_ib_handle_address is only called for these packets. + # - If the condition is not empty, it determines whether the dwords contain an address. + # (if the condition is missing, the packet word must have only 1 variant) + # - If the count is not empty, it must be the code that returns the byte count for ac_ib_handle_address. + 'ADDR_HI': ([], '', ''), + 'CONTROL_BUF_ADDR_HI': ([], '', ''), + 'COUNT_ADDR_HI': ([], '', ''), + 'DST_MEM_ADDR_HI': ([], ('G_37_1_DST_SEL(dw0) == V_37_1_MEMORY_SYNC_ACROSS_GRBM || ' + + 'G_37_1_DST_SEL(dw0) == V_37_1_TC_L2 || ' + + 'G_37_1_DST_SEL(dw0) == V_37_1_MEMORY'), ''), + 'INDEX_BASE_HI': ([], '', ''), + 'DST_ADDR_HI': (['DMA_DATA'], + ('G_50_1_DST_SEL(dw0) == V_50_1_DST_ADDR_USING_DAS || ' + + 'G_50_1_DST_SEL(dw0) == V_50_1_DST_ADDR_USING_L2'), + 'G_50_6_BYTE_COUNT(dw5)'), + 'SRC_ADDR_HI': (['DMA_DATA'], + ('G_50_1_SRC_SEL(dw0) == V_50_1_SRC_ADDR_USING_SAS || ' + + 'G_50_1_SRC_SEL(dw0) == V_50_1_SRC_ADDR_USING_L2'), + 'G_50_6_BYTE_COUNT(dw5)'), + 'ADDRESS_HI': (['EVENT_WRITE', 'SET_BASE'], + 'opcode != PKT3_EVENT_WRITE || G_46_1_EVENT_TYPE(dw0) != V_028A90_PIXEL_PIPE_STAT_CONTROL', + ''), +} + + engines_dict = {'pfp': 0, 'meg': 1, 'mec': 2} @@ -199,7 +270,11 @@ def print2(s1, s2): print(s1.ljust(80) + s2) -def main(): +re_opcode = re.compile(r"^\s*IT_(?P\w+)\s*=\s*(?P0x[\da-fA-F]+),*$") +re_gfx_number = re.compile(r"gfx(\d+)") + + +def print_packet_definitions(): assert len(sys.argv) % 2 == 0 # argv = executable, N*2 input files, gfx$VERSION num_gfx_versions = (len(sys.argv) - 2) // 2 assert num_gfx_versions > 0 @@ -211,9 +286,6 @@ def main(): gfx_versions = {} gfx_opcodes = {} - re_gfx_number = re.compile(r"gfx(\d+)") - re_opcode = re.compile(r"^\s*IT_(?P\w+)\s*=\s*(?P0x[\da-fA-F]+),*$") - for i in range(num_gfx_versions): packet_filename = sys.argv[1 + i * 2] opcode_filename = sys.argv[1 + i * 2 + 1] @@ -370,5 +442,346 @@ def main(): str(value_int) + value_comment) +def packet_has_engine_sel(packet_dict): + return ('pfp' in packet_dict and 'meg' in packet_dict and + 'engine_sel' in packet_dict['pfp']['word']['2']['a']) + + +def print_enum_table(packet_name, packet_dict): + # Gather a merged enum table from all engines. + for engine_name, packet in packet_dict.items(): + if 'enum' not in packet: + continue + + # Packets that have both PFP and MEG definitions and don't have ENGINE_SEL are parsed as PFP, + # so ignore MEG enums. + if engine_name == 'meg' and 'pfp' in packet_dict and not packet_has_engine_sel(packet_dict): + continue; + + enums = packet['enum'] if 'enum' in packet else {} + table = {} + + for field_name, values in enums.items(): + assert len(values) > 0 + + if field_name not in table: + table[field_name] = {} + + for value_name, value_item in values.items(): + value = value_item['value'] + + if value_name.startswith('reserved'): + continue + + if value in table[field_name]: + if table[field_name][value] != value_name: + print('// Enum conflict: Packet %s field %s has value %d = %s, but the table already has %d = %s' % + (packet_name, field_name, value, value_name.upper(), value, table[field_name][value].upper())) + else: + table[field_name][value] = value_name + + for field_name, values in table.items(): + print('') + print('static const char *%s_%s_%s[] = {' % (engine_name, packet_name, field_name)) + + for value, value_name in table[field_name].items(): + print(3 * ' ' + '[%d] = "%s",' % (value, value_name.upper())) + + print('};') + + +def print_packet(packet_name, packet_dict, engine_name, dword0_read): + if engine_name not in packet_dict: + print(9 * ' ' + 'fprintf(stderr, "amdgpu: packet %s is not supported by %s\\n");' % + (packet_name, engine_name.upper())) + print(9 * ' ' + 'assert(0 && "packet %s is not supported by %s");' % (packet_name, engine_name.upper())) + return + + packet = packet_dict[engine_name] + enums = packet['enum'] if 'enum' in packet else {} + words = packet['word'] + seen_variable_length_word = False + + # Some packets need dwords to be loaded first if the byte count is after the address words. + load_dwords_first = packet_name == 'DMA_DATA' + + if load_dwords_first: + for word_index, word_variants in words.items(): + if int(word_index) == 1: + continue # it's the packet header. + + if int(word_index) == 2 and dword0_read: + continue + + # Don't load any variable-length fields here. + has_variable_length_field = False + for _, word_variant in word_variants.items(): + has_variable_length_field = has_variable_length_field or len([x for x in word_variant.keys() if '[]' in x]) > 0 + if has_variable_length_field: + continue + + word_index_0based = int(word_index) - 2 + + if len(word_variants) == 0: + print(9 * ' ' + 'if (%d <= pkt_count_field) ac_ib_get(ib);' % word_index_0based) + else: + print(9 * ' ' + 'uint32_t dw%d = %d <= pkt_count_field ? ac_ib_get(ib) : 0;' % (word_index_0based, word_index_0based)) + + # Print the dwords. + for word_index, word_variants in words.items(): + if int(word_index) == 1: + continue # it's the packet header. + + get_dword = (int(word_index) > 2 or not dword0_read) and not load_dwords_first + word_index_0based = int(word_index) - 2 + dword_var = 'dw%d' % word_index_0based + + # Parse the dword. + for word_variant_name, word_variant in word_variants.items(): + prefix = ('[%s]' % word_variant_name.upper()) if len(word_variants) > 1 else '' + num_printed_fields = len([field_name for field_name in word_variant.keys() + if not field_name.startswith('reserved') and not field_name.startswith('dummy')]) + + # If any field (it should be exactly one field) contains [], it's a variable-length packet. + num_var_length_fields = len([x for x in word_variant.keys() if '[]' in x]) + if num_var_length_fields > 0: + assert num_var_length_fields == 1 + seen_variable_length_word = True + + if packet_name == 'WRITE_DATA': + assert word_index_0based == 3 + print(9 * ' ' + 'for (unsigned i = 0; i < pkt_count_field - 3; i++)') + print(12 * ' ' + 'ac_print_data_dword(ib->f, ac_ib_get(ib), "data");') + else: + assert False, 'unexpected variable-length packet: %s' % packet_name + continue + + assert not seen_variable_length_word + + # Get the next dword if needed. + if get_dword: + if word_index_0based > 0: + print('') + + if len(word_variant) == 0: + print(9 * ' ' + 'ac_ib_get(ib);') + else: + print(9 * ' ' + 'uint32_t %s = ac_ib_get(ib);' % dword_var) + + get_dword = False + + # Iterate over all fields. + for field_name, field in word_variant.items(): + # Get field bits. + first_bit, last_bit = get_field_bits(field) + num_bits = last_bit - first_bit + 1 + bitmask = (1 << num_bits) - 1 + + if field_name.startswith('reserved') or field_name.startswith('dummy'): + # If a word has multiple variants, a reserved field in one variant may be used by another variant, + # and we don't know which word variant is used, so ignore reserved fields. + if len(word_variants) == 1: + if num_bits == 32: + print(9 * ' ' + 'assert(!%s && "reserved packet fields should be 0 for %s, word %d");' % + (dword_var, packet_name, word_index_0based)) + else: + print(9 * ' ' + 'assert(!((%s >> %d) & 0x%x) && "reserved packet fields should be 0 for %s, word %d");' % + (dword_var, first_bit, bitmask, packet_name, word_index_0based)) + continue + + # Some address fields don't use the first 2-3 bits. Include them anyway. + if num_printed_fields == 1 and first_bit + num_bits == 32 and first_bit <= 8: + num_bits = 32 + + # Extract the field value if needed. + if num_bits < 32: + field_var = '%s%s_%s' % (dword_var, '' if len(word_variants) == 1 else word_variant_name.upper(), field_name) + print(9 * ' ' + 'uint32_t %s = (%s >> %d) & 0x%x;' % (field_var, dword_var, first_bit, bitmask)) + else: + field_var = dword_var + + register_map_key = (field_name.upper(), first_bit) + + # Choose one of the methods of printing the field + if field_name in enums: + # Print it as an enum value string + enum_array = '%s_%s_%s' % (engine_name, packet_name, field_name) + value_name_var = '%s_str' % field_var + + print(9 * ' ' + 'const char *%s = %s < ARRAY_SIZE(%s) ?' % (value_name_var, field_var, enum_array)); + print(9 * ' ' + ' %s[%s] : NULL;' % (enum_array, field_var)) + print(9 * ' ' + 'assert(%s && "invalid/reserved values shouldn\'t be present");' % value_name_var) + print(9 * ' ' + 'ac_print_string_value(ib->f, "%s%s", %s);' % (prefix, field_name.upper(), value_name_var)) + elif register_map_key in packet_field_register_map: + # Print it as a register + reg_name, mask = packet_field_register_map[register_map_key] + print(9 * ' ' + 'ac_dump_reg(ib->f, ib->gfx_level, ib->family, %s, %s, %s);' % + (reg_name, field_var, hex(mask) if mask >= 0 else '~0')) + else: + # Print it as a regular value + print(9 * ' ' + 'ac_print_named_value(ib->f, "%s%s", %s, %d);' % + (prefix, field_name.upper(), field_var, num_bits)) + + # If the field is an address, invoke ac_ib_handle_address. + if field_name.upper() in address_field_map: + packet_list, addr_condition, count = address_field_map[field_name.upper()] + indent = 9 + + if len(packet_list) == 0 or packet_name in packet_list: + assert len(addr_condition) > 0 or len(word_variants) == 1 + + if len(addr_condition) > 0: + print(9 * ' ' + 'if (%s)' % addr_condition) + indent = 12 + + print(indent * ' ' + 'ac_ib_handle_address(ib, %s, %s, %s);' % + ('dw%d' % (word_index_0based - 1), dword_var, '0' if count == '' else count)) + + # Stop printing if that was the last word of the packet. + if word_index_0based < len(words) - 2: + print(9 * ' ' + 'if (pkt_count_field == %d) break;' % word_index_0based) + + +def should_skip_packet(packet_name): + # TODO: This packet conflicts with INDIRECT_BUFFER (same opcode number), but we may need to handle it somehow + return packet_name == 'COND_INDIRECT_BUFFER' + + +def get_packet_dict(engines, packet_name): + # Get a dictionary of the packet definition where the engine name is the top-level key. + packet_dict = {} + + for engine_name, packets in engines.items(): + if packet_name in packets: + packet_dict[engine_name] = packets[packet_name] + + return packet_dict + + +def print_packet_parser(is_header): + gfx_version = 'gfx' + re_gfx_number.search(sys.argv[1]).group(1) + + # Load the packet file + engines = json.load(open(sys.argv[1], 'r', encoding='utf-8'))['pm4_packets'] + + # Load the opcode file + opcode_file = open(sys.argv[2], 'r', encoding='utf-8') + opcodes = {} + + for line in opcode_file: + match = re_opcode.match(line) + if match: + opcodes[match['name']] = int(match['hex'], 16) + + print( +"""/* This file is automatically generated. DO NOT EDIT. + * + * Copyright 2026 Advanced Micro Devices, Inc. + * SPDX-License-Identifier: MIT + */ +""") + + if is_header: + print('#ifndef AMD_CP_IB_PARSER_%s' % gfx_version.upper()) + print('#define AMD_CP_IB_PARSER_%s' % gfx_version.upper()) + print('') + print('#include "ac_debug.h"') + else: + print('#include "amd_cp_print_packet_%s.h"' % gfx_version) + print('#include "amd_cp_packets_%s.h"' % gfx_version) + print('#include "amdgfxregs.h"') + + # Generate enum-to-string tables. + if not is_header: + for packet_name, value in opcodes.items(): + if not should_skip_packet(packet_name) and packet_name not in no_printer_support: + print_enum_table(packet_name, get_packet_dict(engines, packet_name)) + + print('') + print('/* Print the packet and use assertions to validate its content. */') + print('void') + print('amd_cp_print_packet_%s(struct ac_ib_parser *ib, unsigned opcode, unsigned pkt_count_field)%s' + % (gfx_version, ';' if is_header else '')) + + if is_header: + print('') + print('#endif') + return + + print('{') + print(3 * ' ' + 'switch (opcode) {') + + # Generate packet parser cases. + for packet_name, value in opcodes.items(): + skip_packet = should_skip_packet(packet_name) + if skip_packet: + print('#if 0') + + packet_dict = get_packet_dict(engines, packet_name) + print(3 * ' ' + 'case 0x%X: { /* PKT3_%s */' % (value, packet_name)) + + if packet_name in no_printer_support: + print(6 * ' ' + 'UNREACHABLE("the caller should handle %s");' % packet_name) + else: + has_engine_sel = packet_has_engine_sel(packet_dict) + + if has_engine_sel: + print(6 * ' ' + 'uint32_t dw0 = ac_ib_get(ib);') + print('') + + print(6 * ' ' + 'if (ib->ip_type == AMD_IP_COMPUTE) {') + + if has_engine_sel: + # Generate an expression that checks ENGINE_SEL + engine_sel_infix = ('%X_1%s' % + (opcodes[packet_name], '' if len(packet_dict['pfp']['word']['2']) == 1 else 'A')) + engine_sel_getter = 'G_%s_ENGINE_SEL' % engine_sel_infix + + if 'pfp' in packet_dict['pfp']['enum']['engine_sel']: + pfp_value_name = 'PFP' + elif 'prefetch_parser' in packet_dict['pfp']['enum']['engine_sel']: + pfp_value_name = 'PREFETCH_PARSER' + else: + assert False, 'ENGINE_SEL doesn''t contain PFP or PREFETCH_PARSER' + + pfp_value = 'V_%s_%s' % (engine_sel_infix, pfp_value_name) + + print_packet(packet_name, packet_dict, 'mec', True) + + # Parse both PFP and MEG packet variants. + print(6 * ' ' + '} else if (%s(dw0) == %s) {' % (engine_sel_getter, pfp_value)) + print_packet(packet_name, packet_dict, 'pfp', True) + print(6 * ' ' + '} else {') + print_packet(packet_name, packet_dict, 'meg', True) + else: + print_packet(packet_name, packet_dict, 'mec', False) + print(6 * ' ' + '} else {') + print_packet(packet_name, packet_dict, 'pfp' if 'pfp' in packet_dict else 'meg', False) + + print(6 * ' ' + '}') + print(6 * ' ' + 'break;') + + print(3 * ' ' + '}') + if skip_packet: + print('#endif') + print('') + + print(3 * ' ' + 'default:') + print(6 * ' ' + 'fprintf(stderr, "amdgpu: cannot decode packet 0x%x\\n", opcode);') + print(6 * ' ' + 'break;') + + print(3 * ' ' + '}') + print('}') + + if __name__ == "__main__": - main() + last = sys.argv.pop() + + if last == 'packets_h': + print_packet_definitions() + elif last == 'print_c': + print_packet_parser(False) + elif last == 'print_h': + print_packet_parser(True) + else: + assert False, 'the last parameter must be "header" or "parser"' From e3c731690c7b8b0f021271065437a3b37333f4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 2 Mar 2026 16:44:09 -0500 Subject: [PATCH 3/4] ac: enable the new auto-generated CP packet parser This keeps old packets that were removed from newer HW, packets that set registers, and packets using non-trivial custom code. It preserves address checking that was done in print_addr. Packet names still used the old generator. --- src/amd/common/ac_parse_ib.c | 316 +++++++---------------------------- 1 file changed, 64 insertions(+), 252 deletions(-) diff --git a/src/amd/common/ac_parse_ib.c b/src/amd/common/ac_parse_ib.c index 320db7cb1b3..b71a93e08ee 100644 --- a/src/amd/common/ac_parse_ib.c +++ b/src/amd/common/ac_parse_ib.c @@ -5,6 +5,8 @@ */ #include "ac_debug.h" +#include "amd_cp_print_packet_gfx11.h" +#include "amd_cp_print_packet_gfx12.h" #include "sid.h" #include "sid_tables.h" #include "ac_vcn.h" @@ -242,9 +244,41 @@ static void ac_parse_set_reg_pairs_packed_packet(FILE *f, unsigned count, unsign #define AC_ADDR_SIZE_NOT_MEMORY 0xFFFFFFFF +static const char *check_address(struct ac_ib_parser *ib, uint64_t addr, uint32_t size) +{ + if (ib->addr_callback && size != AC_ADDR_SIZE_NOT_MEMORY) { + struct ac_addr_info addr_info; + ib->addr_callback(ib->addr_callback_data, addr, &addr_info); + + struct ac_addr_info addr_info2 = addr_info; + if (size) + ib->addr_callback(ib->addr_callback_data, addr + size - 1, &addr_info2); + + uint32_t invalid_count = !addr_info.valid + !addr_info2.valid; + + if (addr_info.use_after_free && addr_info2.use_after_free) + return " used after free"; + else if (invalid_count == 2) + return " invalid"; + else if (invalid_count == 1) + return " out of bounds"; + } + + return NULL; +} + void ac_ib_handle_address(struct ac_ib_parser *ib, uint32_t addr_lo, uint32_t addr_hi, uint32_t size) { - /* stub */ + uint64_t addr = addr_lo | ((uint64_t)addr_hi << 32); + const char *addr_message = check_address(ib, addr, size); + + print_spaces(ib->f, INDENT_PKT); + fprintf(ib->f, "%s(FULL ADDRESS)%s <- 0x%"PRIx64, O_COLOR_YELLOW, O_COLOR_RESET, addr); + + if (addr_message) + fprintf(ib->f, "%s", addr_message); + + fprintf(ib->f, "\n"); } static void print_addr(struct ac_ib_parser *ib, const char *name, uint64_t addr, uint32_t size) @@ -258,27 +292,21 @@ static void print_addr(struct ac_ib_parser *ib, const char *name, uint64_t addr, fprintf(f, "0x%llx", (unsigned long long)addr); - if (ib->addr_callback && size != AC_ADDR_SIZE_NOT_MEMORY) { - struct ac_addr_info addr_info; - ib->addr_callback(ib->addr_callback_data, addr, &addr_info); - - struct ac_addr_info addr_info2 = addr_info; - if (size) - ib->addr_callback(ib->addr_callback_data, addr + size - 1, &addr_info2); - - uint32_t invalid_count = !addr_info.valid + !addr_info2.valid; - - if (addr_info.use_after_free && addr_info2.use_after_free) - fprintf(f, " used after free"); - else if (invalid_count == 2) - fprintf(f, " invalid"); - else if (invalid_count == 1) - fprintf(f, " out of bounds"); - } + const char *addr_message = check_address(ib, addr, size); + if (addr_message) + fprintf(f, "%s", addr_message); fprintf(f, "\n"); } +static void ac_cp_print_packet_generated(struct ac_ib_parser *ib, unsigned opcode, unsigned count) +{ + if (ib->gfx_level >= GFX12) + amd_cp_print_packet_gfx12(ib, opcode, count); + else + amd_cp_print_packet_gfx11(ib, opcode, count); +} + static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, int *current_trace_id) { @@ -289,7 +317,6 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, const char *predicated = PKT3_PREDICATE(header) ? "(predicated)" : ""; const char *reset_filter_cam = PKT3_RESET_FILTER_CAM_G(header) ? "(reset_filter_cam)" : ""; int i; - unsigned tmp; /* Print the name first. */ for (i = 0; i < ARRAY_SIZE(packet3_table); i++) @@ -351,39 +378,8 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, case PKT3_SET_SH_REG_PAIRS_PACKED_N: ac_parse_set_reg_pairs_packed_packet(f, count, SI_SH_REG_OFFSET, ib); break; - case PKT3_ACQUIRE_MEM: - if (ib->gfx_level >= GFX11) { - if (G_585_PWS_ENA(ib->ib[ib->cur_dw + 5])) { - ac_dump_reg(f, ib->gfx_level, ib->family, R_580_ACQUIRE_MEM_PWS_2, ac_ib_get(ib), ~0); - print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32); - print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25); - print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32); - print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32); - ac_dump_reg(f, ib->gfx_level, ib->family, R_585_ACQUIRE_MEM_PWS_7, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0); - } else { - print_string_value(f, "ENGINE_SEL", ac_ib_get(ib) & 0x80000000 ? "ME" : "PFP"); - print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32); - print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25); - print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32); - print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32); - print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); - ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0); - } - } else { - tmp = ac_ib_get(ib); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, tmp, 0x7fffffff); - print_string_value(f, "ENGINE_SEL", tmp & 0x80000000 ? "ME" : "PFP"); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0); - print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); - if (ib->gfx_level >= GFX10) - ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0); - } - break; case PKT3_SURFACE_SYNC: + /* GFX6-8 */ if (ib->gfx_level >= GFX7) { ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); @@ -395,18 +391,8 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, } print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); break; - case PKT3_EVENT_WRITE: { - uint32_t event_dw = ac_ib_get(ib); - ac_dump_reg(f, ib->gfx_level, ib->family, R_028A90_VGT_EVENT_INITIATOR, event_dw, - S_028A90_EVENT_TYPE(~0)); - print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4); - print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1); - if (count > 0) - print_addr(ib, "ADDR", ac_ib_get64(ib), 0); - - break; - } case PKT3_EVENT_WRITE_EOP: { + /* GFX6-8 */ uint32_t event_dw = ac_ib_get(ib); ac_dump_reg(f, ib->gfx_level, ib->family, R_028A90_VGT_EVENT_INITIATOR, event_dw, S_028A90_EVENT_TYPE(~0)); @@ -440,10 +426,13 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, break; } case PKT3_RELEASE_MEM: { - uint32_t event_dw = ac_ib_get(ib); if (ib->gfx_level >= GFX10) { - ac_dump_reg(f, ib->gfx_level, ib->family, R_490_RELEASE_MEM_OP, event_dw, ~0u); - } else { + ac_cp_print_packet_generated(ib, op, count); + break; + } + /* GFX6-9 */ + uint32_t event_dw = ac_ib_get(ib); + { ac_dump_reg(f, ib->gfx_level, ib->family, R_028A90_VGT_EVENT_INITIATOR, event_dw, S_028A90_EVENT_TYPE(~0)); print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4); @@ -467,67 +456,12 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, print_named_value(f, "CTXID", ac_ib_get(ib), 32); break; } - case PKT3_WAIT_REG_MEM: - print_named_value(f, "OP", ac_ib_get(ib), 32); - print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32); - print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32); - print_named_value(f, "REF", ac_ib_get(ib), 32); - print_named_value(f, "MASK", ac_ib_get(ib), 32); - print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); - break; - case PKT3_DRAW_INDEX_AUTO: - ac_dump_reg(f, ib->gfx_level, ib->family, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0); - break; - case PKT3_DRAW_INDEX_2: - ac_dump_reg(f, ib->gfx_level, ib->family, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0); - print_addr(ib, "INDEX_ADDR", ac_ib_get64(ib), 0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0); - break; - case PKT3_DRAW_INDIRECT: - case PKT3_DRAW_INDEX_INDIRECT: - print_named_value(f, "OFFSET", ac_ib_get(ib), 32); - print_named_value(f, "VERTEX_OFFSET_REG", ac_ib_get(ib), 32); - print_named_value(f, "START_INSTANCE_REG", ac_ib_get(ib), 32); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0); - break; - case PKT3_DRAW_INDIRECT_MULTI: - case PKT3_DRAW_INDEX_INDIRECT_MULTI: - print_named_value(f, "OFFSET", ac_ib_get(ib), 32); - print_named_value(f, "VERTEX_OFFSET_REG", ac_ib_get(ib), 32); - print_named_value(f, "START_INSTANCE_REG", ac_ib_get(ib), 32); - tmp = ac_ib_get(ib); - print_named_value(f, "DRAW_ID_REG", tmp & 0xFFFF, 16); - print_named_value(f, "DRAW_ID_ENABLE", tmp >> 31, 1); - print_named_value(f, "COUNT_INDIRECT_ENABLE", (tmp >> 30) & 1, 1); - print_named_value(f, "DRAW_COUNT", ac_ib_get(ib), 32); - print_addr(ib, "COUNT_ADDR", ac_ib_get64(ib), 0); - print_named_value(f, "STRIDE", ac_ib_get(ib), 32); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0); - break; - case PKT3_INDEX_BASE: - print_addr(ib, "ADDR", ac_ib_get64(ib), 0); - break; case PKT3_INDEX_TYPE: + /* GFX6-8 */ ac_dump_reg(f, ib->gfx_level, ib->family, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0); break; - case PKT3_NUM_INSTANCES: - ac_dump_reg(f, ib->gfx_level, ib->family, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0); - break; - case PKT3_WRITE_DATA: { - uint32_t control = ac_ib_get(ib); - ac_dump_reg(f, ib->gfx_level, ib->family, R_370_CONTROL, control, ~0); - uint32_t dst_sel = G_370_DST_SEL(control); - uint64_t addr = ac_ib_get64(ib); - uint32_t dword_count = first_dw + count + 1 - ib->cur_dw; - bool writes_memory = dst_sel == V_370_MEM_GRBM || dst_sel == V_370_TC_L2 || dst_sel == V_370_MEM; - print_addr(ib, "DST_ADDR", addr, writes_memory ? dword_count * 4 : AC_ADDR_SIZE_NOT_MEMORY); - for (uint32_t i = 0; i < dword_count; i++) - print_data_dword(f, ac_ib_get(ib), "data"); - break; - } case PKT3_CP_DMA: + /* GFX6 */ ac_dump_reg(f, ib->gfx_level, ib->family, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->gfx_level, ib->family, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->gfx_level, ib->family, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0); @@ -535,6 +469,11 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, ac_dump_reg(f, ib->gfx_level, ib->family, R_415_COMMAND, ac_ib_get(ib), ~0); break; case PKT3_DMA_DATA: { + if (ib->gfx_level >= GFX9) { + ac_cp_print_packet_generated(ib, op, count); + break; + } + /* GFX7-8 */ uint32_t header = ac_ib_get(ib); ac_dump_reg(f, ib->gfx_level, ib->family, R_501_DMA_DATA_WORD0, header, ~0); @@ -542,8 +481,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, uint64_t dst_addr = ac_ib_get64(ib); uint32_t command = ac_ib_get(ib); - uint32_t size = ib->gfx_level >= GFX9 ? G_415_BYTE_COUNT_GFX9(command) - : G_415_BYTE_COUNT_GFX6(command); + uint32_t size = G_415_BYTE_COUNT_GFX6(command); uint32_t src_sel = G_501_SRC_SEL(header); bool src_mem = (src_sel == V_501_SRC_ADDR && G_415_SAS(command) == V_415_MEMORY) || @@ -559,7 +497,6 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, break; } case PKT3_INDIRECT_BUFFER_SI: - case PKT3_INDIRECT_BUFFER_CONST: case PKT3_INDIRECT_BUFFER: { uint32_t base_lo_dw = ac_ib_get(ib); ac_dump_reg(f, ib->gfx_level, ib->family, R_3F0_IB_BASE_LO, base_lo_dw, ~0); @@ -604,11 +541,6 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, fprintf(f, "\n\035<------------------- nested end -------------------\n"); break; } - case PKT3_CLEAR_STATE: - case PKT3_INCREMENT_DE_COUNTER: - case PKT3_PFP_SYNC_ME: - print_data_dword(f, ac_ib_get(ib), "reserved"); - break; case PKT3_NOP: if (header == PKT3_NOP_PAD) { count = -1; /* One dword NOP. */ @@ -645,128 +577,8 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, print_data_dword(f, ac_ib_get(ib), "unused"); } break; - case PKT3_DISPATCH_DIRECT: - case PKT3_DISPATCH_DIRECT_INTERLEAVED: - ac_dump_reg(f, ib->gfx_level, ib->family, R_00B804_COMPUTE_DIM_X, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_00B808_COMPUTE_DIM_Y, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_00B80C_COMPUTE_DIM_Z, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR, - ac_ib_get(ib), ~0); - break; - case PKT3_DISPATCH_INDIRECT: - case PKT3_DISPATCH_INDIRECT_INTERLEAVED: - if (count > 1) - print_addr(ib, "ADDR", ac_ib_get64(ib), 12); - else - print_named_value(f, "DATA_OFFSET", ac_ib_get(ib), 32); - - ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR, - ac_ib_get(ib), ~0); - break; - case PKT3_SET_BASE: - tmp = ac_ib_get(ib); - print_string_value(f, "BASE_INDEX", tmp == 1 ? "INDIRECT_BASE" : COLOR_RED "UNKNOWN" COLOR_RESET); - print_addr(ib, "ADDR", ac_ib_get64(ib), 0); - break; - case PKT3_PRIME_UTCL2: - tmp = ac_ib_get(ib); - print_named_value(f, "CACHE_PERM[rwx]", tmp & 0x7, 3); - print_string_value(f, "PRIME_MODE", tmp & 0x8 ? "WAIT_FOR_XACK" : "DONT_WAIT_FOR_XACK"); - print_named_value(f, "ENGINE_SEL", tmp >> 30, 2); - print_addr(ib, "ADDR", ac_ib_get64(ib), 0); - print_named_value(f, "REQUESTED_PAGES", ac_ib_get(ib), 14); - break; - case PKT3_ATOMIC_MEM: - tmp = ac_ib_get(ib); - print_named_value(f, "ATOMIC", tmp & 0x7f, 7); - print_named_value(f, "COMMAND", (tmp >> 8) & 0xf, 4); - print_named_value(f, "CACHE_POLICY", (tmp >> 25) & 0x3, 2); - print_named_value(f, "ENGINE_SEL", tmp >> 30, 2); - print_addr(ib, "ADDR", ac_ib_get64(ib), 8); - print_named_value(f, "SRC_DATA_LO", ac_ib_get(ib), 32); - print_named_value(f, "SRC_DATA_HI", ac_ib_get(ib), 32); - print_named_value(f, "CMP_DATA_LO", ac_ib_get(ib), 32); - print_named_value(f, "CMP_DATA_HI", ac_ib_get(ib), 32); - print_named_value(f, "LOOP_INTERVAL", ac_ib_get(ib) & 0x1fff, 13); - break; - case PKT3_INDEX_BUFFER_SIZE: - print_named_value(f, "COUNT", ac_ib_get(ib), 32); - break; - case PKT3_COND_EXEC: { - uint32_t size = ac_ib_get(ib) * 4; - print_addr(ib, "ADDR", ac_ib_get64(ib), size); - print_named_value(f, "SIZE", size, 32); - break; - } - case PKT3_DISPATCH_TASKMESH_GFX: - tmp = ac_ib_get(ib); - print_named_value(f, "RING_ENTRY_REG", (tmp >> 16) & 0xffff, 16); - print_named_value(f, "XYZ_DIM_REG", (tmp & 0xffff), 16); - tmp = ac_ib_get(ib); - print_named_value(f, "THREAD_TRACE_MARKER_ENABLE", (tmp >> 31) & 0x1, 1); - if (ib->gfx_level >= GFX11) { - print_named_value(f, "XYZ_DIM_ENABLE", (tmp >> 30) & 0x1, 1); - print_named_value(f, "MODE1_ENABLE", (tmp >> 29) & 0x1, 1); - print_named_value(f, "LINEAR_DISPATCH_ENABLED", (tmp >> 28) & 0x1, 1); - } - print_named_value(f, "DI_SRC_SEL_AUTO_INDEX", ac_ib_get(ib), ~0); - break; - case PKT3_DISPATCH_TASKMESH_DIRECT_ACE: - print_named_value(f, "X_DIM", ac_ib_get(ib), ~0); - print_named_value(f, "Y_DIM", ac_ib_get(ib), ~0); - print_named_value(f, "Z_DIM", ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR, - ac_ib_get(ib), ~0); - print_named_value(f, "RING_ENTRY_REG", ac_ib_get(ib), 16); - break; - case PKT3_DISPATCH_MESH_DIRECT: - print_named_value(f, "X_DIM", ac_ib_get(ib), ~0); - print_named_value(f, "Y_DIM", ac_ib_get(ib), ~0); - print_named_value(f, "Z_DIM", ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, - ac_ib_get(ib), ~0); - break; - case PKT3_DISPATCH_MESH_INDIRECT_MULTI: - print_named_value(f, "DATA_OFFSET", ac_ib_get(ib), 32); - tmp = ac_ib_get(ib); - print_named_value(f, "DRAW_INDEX_LOC", (tmp >> 16) & 0xffff, 16); - print_named_value(f, "XYZ_DIM_LOC", tmp & 0xffff, 16); - tmp = ac_ib_get(ib); - print_named_value(f, "DRAW_INDEX_ENABLE", tmp >> 31, 1); - print_named_value(f, "COUNT_INDIRECT_ENABLE", (tmp >> 30) & 1, 1); - print_named_value(f, "THREAD_TRACE_MARKER_ENABLE", (tmp >> 29) & 1, 1); - if (ib->gfx_level >= GFX11) { - print_named_value(f, "XYZ_DIM_ENABLE", (tmp >> 28) & 1, 1); - print_named_value(f, "MODE1_ENABLE", (tmp >> 27) & 1, 1); - } else { - print_named_value(f, "USE_VGPRS", (tmp >> 28) & 1, 1); - } - print_named_value(f, "COUNT", ac_ib_get(ib), 32); - print_addr(ib, "COUNT_ADDR", ac_ib_get64(ib), 0); - print_named_value(f, "STRIDE", ac_ib_get(ib), 32); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, - ac_ib_get(ib), ~0); - break; - case PKT3_DISPATCH_TASK_STATE_INIT: - print_addr(ib, "CONTROL_BUF_ADDR", ac_ib_get64(ib), 0); - break; - case PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE: - print_addr(ib, "DATA_ADDR", ac_ib_get64(ib), 0); - tmp = ac_ib_get(ib); - print_named_value(f, "RING_ENTRY_LOC", tmp & 0xffff, 16); - tmp = ac_ib_get(ib); - print_named_value(f, "DRAW_INDEX_LOC", (tmp >> 16) & 0xffff, 16); - print_named_value(f, "XYZ_DIM_ENABLE", (tmp >> 3) & 1, 1); - print_named_value(f, "DRAW_INDEX_ENABLE", (tmp >> 2), 1); - print_named_value(f, "COUNT_INDIRECT_ENABLE", (tmp >> 1) & 1, 1); - print_named_value(f, "THREAD_TRACE_MARKER_ENABLE", tmp & 1, 1); - tmp = ac_ib_get(ib); - print_named_value(f, "XYZ_DIM_LOC", tmp & 0xffff, 16); - print_named_value(f, "COUNT", ac_ib_get(ib), 32); - print_addr(ib, "COUNT_ADDR", ac_ib_get64(ib), 0); - print_named_value(f, "STRIDE", ac_ib_get(ib), 32); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0287F0_VGT_DRAW_INITIATOR, - ac_ib_get(ib), ~0); + default: + ac_cp_print_packet_generated(ib, op, count); break; } From 707c53d5349ba44e83f98dca043aa56809f6e2b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 2 Mar 2026 17:20:23 -0500 Subject: [PATCH 4/4] ac: replace some packet field definitions in sid.h by generated ones --- src/amd/common/ac_cmdbuf_cp.c | 4 +- src/amd/common/ac_shadowed_regs.c | 22 +++---- src/amd/common/ac_sqtt.c | 2 +- src/amd/common/sid.h | 62 ++++--------------- src/amd/vulkan/radv_cmd_buffer.c | 6 +- src/amd/vulkan/radv_dgc.c | 8 +-- src/amd/vulkan/radv_queue.c | 6 +- .../drivers/radeonsi/si_cp_reg_shadowing.c | 14 ++--- src/gallium/drivers/radeonsi/si_query.c | 8 +-- src/gallium/drivers/radeonsi/si_sqtt.c | 8 +-- src/gallium/drivers/radeonsi/si_state.c | 36 +++++------ .../drivers/radeonsi/si_state_draw.cpp | 4 +- src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp | 10 +-- src/gallium/winsys/amdgpu/drm/amdgpu_userq.c | 4 +- 14 files changed, 78 insertions(+), 116 deletions(-) diff --git a/src/amd/common/ac_cmdbuf_cp.c b/src/amd/common/ac_cmdbuf_cp.c index 4ba512b7952..a4a9340085d 100644 --- a/src/amd/common/ac_cmdbuf_cp.c +++ b/src/amd/common/ac_cmdbuf_cp.c @@ -538,8 +538,8 @@ ac_emit_cp_atomic_mem(struct ac_cmdbuf *cs, uint32_t atomic_op, { ac_cmdbuf_begin(cs); ac_cmdbuf_emit(PKT3(PKT3_ATOMIC_MEM, 7, 0)); - ac_cmdbuf_emit(ATOMIC_OP(atomic_op) | - ATOMIC_COMMAND(atomic_cmd)); + ac_cmdbuf_emit(S_1E_1_ATOMIC(atomic_op) | + S_1E_1_COMMAND(atomic_cmd)); ac_cmdbuf_emit(va); /* addr lo */ ac_cmdbuf_emit(va >> 32); /* addr hi */ ac_cmdbuf_emit(data); /* data lo */ diff --git a/src/amd/common/ac_shadowed_regs.c b/src/amd/common/ac_shadowed_regs.c index 435b497e965..a94ff4e8834 100644 --- a/src/amd/common/ac_shadowed_regs.c +++ b/src/amd/common/ac_shadowed_regs.c @@ -3069,18 +3069,18 @@ struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *i ac_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); ac_pm4_cmd_add(pm4, - CC0_UPDATE_LOAD_ENABLES(1) | - CC0_LOAD_PER_CONTEXT_STATE(1) | - CC0_LOAD_CS_SH_REGS(1) | - CC0_LOAD_GFX_SH_REGS(1) | - CC0_LOAD_GLOBAL_UCONFIG(1)); + S_28_1_UPDATE_LOAD_ENABLES(1) | + S_28_1_LOAD_PER_CONTEXT_STATE(1) | + S_28_1_LOAD_CS_SH_REGS(1) | + S_28_1_LOAD_GFX_SH_REGS(1) | + S_28_1_LOAD_GLOBAL_UCONFIG(1)); ac_pm4_cmd_add(pm4, - CC1_UPDATE_SHADOW_ENABLES(1) | - CC1_SHADOW_PER_CONTEXT_STATE(1) | - CC1_SHADOW_CS_SH_REGS(1) | - CC1_SHADOW_GFX_SH_REGS(1) | - CC1_SHADOW_GLOBAL_UCONFIG(1) | - CC1_SHADOW_GLOBAL_CONFIG(1)); + S_28_2_UPDATE_SHADOW_ENABLES(1) | + S_28_2_SHADOW_PER_CONTEXT_STATE(1) | + S_28_2_SHADOW_CS_SH_REGS(1) | + S_28_2_SHADOW_GFX_SH_REGS(1) | + S_28_2_SHADOW_GLOBAL_UCONFIG(1) | + S_28_2_SHADOW_GLOBAL_CONFIG(1)); for (unsigned i = 0; i < SI_NUM_REG_RANGES; i++) ac_build_load_reg(info, pm4, i, gpu_address); diff --git a/src/amd/common/ac_sqtt.c b/src/amd/common/ac_sqtt.c index 02bea38f819..f3d4130087c 100644 --- a/src/amd/common/ac_sqtt.c +++ b/src/amd/common/ac_sqtt.c @@ -594,7 +594,7 @@ ac_sqtt_copy_info_regs(const struct radeon_info *info, struct ac_pm4_state *pm4, uint32_t init_wptr_value = shifted_data_va & 0x1fffffff; ac_pm4_cmd_add(pm4, PKT3(PKT3_ATOMIC_MEM, 7, 0)); - ac_pm4_cmd_add(pm4, ATOMIC_OP(TC_OP_ATOMIC_SUB_RTN_32)); + ac_pm4_cmd_add(pm4, S_1E_1_ATOMIC(V_1E_1_GL2_OP_ATOMIC_SUB_RTN_32)); ac_pm4_cmd_add(pm4, info_va); /* addr lo */ ac_pm4_cmd_add(pm4, info_va >> 32); /* addr hi */ ac_pm4_cmd_add(pm4, init_wptr_value); /* data lo */ diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index c5fab6d1761..73bf0c49925 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -36,10 +36,6 @@ #define SI_SHADOWED_REG_BUFFER_SIZE \ (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE) -/* All registers defined in this packet section don't exist and the only - * purpose of these definitions is to define packet encoding that - * the IB parser understands, and also to have an accurate documentation. - */ #define PKT3_NOP 0x10 #define PKT3_SET_BASE 0x11 #define PKT3_CLEAR_STATE 0x12 @@ -47,58 +43,27 @@ #define PKT3_DISPATCH_DIRECT 0x15 #define PKT3_DISPATCH_INDIRECT 0x16 #define PKT3_ATOMIC_MEM 0x1E -#define ATOMIC_OP(x) ((unsigned)((x)&0x7f) << 0) -#define TC_OP_ATOMIC_SUB_RTN_32 16 -#define TC_OP_ATOMIC_SUB_RTN_64 48 -#define TC_OP_ATOMIC_CMPSWAP_32 72 -#define TC_OP_ATOMIC_SUB_64 112 -#define TC_OP_ATOMIC_XOR_64 119 -#define ATOMIC_COMMAND(x) ((unsigned)((x)&0x3) << 8) -#define ATOMIC_COMMAND_SEND_RTN 0x0 /* only RTN opcodes */ -#define ATOMIC_COMMAND_LOOP 0x1 /* only RTN opcodes */ -#define ATOMIC_COMMAND_WR_CONFIRM 0x2 /* only non-RTN opcodes */ -#define ATOMIC_COMMAND_SEND_NO_RTN 0x3 /* only non-RTN opcodes */ -#define ATOMIC_ENGINE_PFP (1 << 30) #define PKT3_OCCLUSION_QUERY 0x1F /* GFX7+ */ #define PKT3_SET_PREDICATION 0x20 -#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8) -#define PREDICATION_DRAW_VISIBLE (1 << 8) -#define PREDICATION_HINT_WAIT (0 << 12) -#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12) -#define PRED_OP(x) ((x) << 16) -#define PREDICATION_OP_CLEAR 0x0 -#define PREDICATION_OP_ZPASS 0x1 -#define PREDICATION_OP_PRIMCOUNT 0x2 -#define PREDICATION_OP_BOOL64 0x3 -#define PREDICATION_OP_BOOL32 0x4 -#define PREDICATION_CONTINUE (1 << 31) +#define PREDICATION_DRAW_NOT_VISIBLE S_20_1_PRED_BOOL(V_20_1_DRAW_IF_NOT_VISIBLE_OR_OVERFLOW) +#define PREDICATION_DRAW_VISIBLE S_20_1_PRED_BOOL(V_20_1_DRAW_IF_VISIBLE_OR_NO_OVERFLOW) +#define PREDICATION_HINT_WAIT S_20_1_HINT(V_20_1_WAIT_UNTIL_FINAL_ZPASS_WRITTEN) +#define PREDICATION_HINT_NOWAIT_DRAW S_20_1_HINT(V_20_1_DRAW_IF_NOT_FINAL_ZPASS_WRITTEN) +#define PREDICATION_OP_CLEAR V_20_1_CLEAR_PREDICATE +#define PREDICATION_OP_ZPASS V_20_1_SET_ZPASS_PREDICATE +#define PREDICATION_OP_PRIMCOUNT V_20_1_SET_PRIMCOUNT_PREDICATE +#define PREDICATION_OP_BOOL64 V_20_1_DX12 +#define PREDICATION_OP_BOOL32 V_20_1_VULKAN +#define PREDICATION_CONTINUE S_20_1_CONTINUE_BIT(V_20_1_CONTINUE_SET_PREDICATION) #define PKT3_COND_EXEC 0x22 -#define COND_EXEC_USERQ_OVERRULE_CMD (1 << 31) #define PKT3_PRED_EXEC 0x23 #define PKT3_DRAW_INDIRECT 0x24 #define PKT3_DRAW_INDEX_INDIRECT 0x25 #define PKT3_INDEX_BASE 0x26 #define PKT3_DRAW_INDEX_2 0x27 #define PKT3_CONTEXT_CONTROL 0x28 -#define CC0_LOAD_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0) -#define CC0_LOAD_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1) -#define CC0_LOAD_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15) -#define CC0_LOAD_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16) -#define CC0_LOAD_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24) -#define CC0_LOAD_CE_RAM(x) (((unsigned)(x)&0x1) << 28) -#define CC0_UPDATE_LOAD_ENABLES(x) (((unsigned)(x)&0x1) << 31) -#define CC1_SHADOW_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0) -#define CC1_SHADOW_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1) -#define CC1_SHADOW_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15) -#define CC1_SHADOW_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16) -#define CC1_SHADOW_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24) -#define CC1_UPDATE_SHADOW_ENABLES(x) (((unsigned)(x)&0x1) << 31) #define PKT3_INDEX_TYPE 0x2A /* GFX6-8 */ #define PKT3_DRAW_INDIRECT_MULTI 0x2C -#define R_2C3_DRAW_INDEX_LOC 0x2C3 -#define S_2C3_THREAD_TRACE_MARKER_ENABLE(x) (((unsigned)(x)&0x1) << 29) -#define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x)&0x1) << 30) -#define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x)&0x1) << 31) #define PKT3_DRAW_INDEX_AUTO 0x2D #define PKT3_DRAW_INDEX_IMMD 0x2E /* GFX6 only */ #define PKT3_NUM_INSTANCES 0x2F @@ -130,9 +95,6 @@ #define WAIT_REG_MEM_PFP (1 << 8) #define PKT3_MEM_WRITE 0x3D /* GFX6 only */ #define PKT3_INDIRECT_BUFFER 0x3F /* GFX6+ */ -#define S_3F3_INHERIT_VMID_MQD_GFX(x) (((unsigned)(x)&0x1) << 22) /* userqueue only */ -#define S_3F3_VALID_COMPUTE(x) (((unsigned)(x)&0x1) << 23) /* userqueue only */ -#define S_3F3_INHERIT_VMID_MQD_COMPUTE(x) (((unsigned)(x)&0x1) << 30) /* userqueue only */ #define PKT3_COPY_DATA 0x40 #define COPY_DATA_SRC_SEL(x) ((x)&0xf) #define COPY_DATA_REG 0 @@ -164,7 +126,7 @@ #define PKT3_ME_INITIALIZE 0x44 /* GFX6 only */ #define PKT3_COND_WRITE 0x45 #define PKT3_EVENT_WRITE 0x46 -#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_TYPE(x) S_46_1_EVENT_TYPE(x) /* 0 - any non-TS event * 1 - ZPASS_DONE * 2 - SAMPLE_PIPELINESTAT @@ -172,7 +134,7 @@ * 4 - *S_PARTIAL_FLUSH * 5 - TS events */ -#define EVENT_INDEX(x) ((x) << 8) +#define EVENT_INDEX(x) S_46_1_EVENT_INDEX(x) #define PIXEL_PIPE_STATE_CNTL_COUNTER_ID(x) ((x) << 3) #define PIXEL_PIPE_STATE_CNTL_STRIDE(x) ((x) << 9) /* 0 - 32 bits diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c55244bb6e4..40c97feb890 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -10563,8 +10563,8 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool index radeon_emit(0); radeon_emit(vertex_offset_reg); radeon_emit(start_instance_reg); - radeon_emit(draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C3_COUNT_INDIRECT_ENABLE(!!count_va) | - S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en)); + radeon_emit(draw_id_reg | S_2C_4_DRAW_INDEX_ENABLE(draw_id_enable) | S_2C_4_COUNT_INDIRECT_ENABLE(!!count_va) | + S_2C_4_THREAD_TRACE_MARKER_ENABLE(sqtt_en)); radeon_emit(draw_count); /* count */ radeon_emit(count_va); /* count_addr */ radeon_emit(count_va >> 32); @@ -15008,7 +15008,7 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi if (va) { assert(pred_op == PREDICATION_OP_BOOL32 || pred_op == PREDICATION_OP_BOOL64); - op = PRED_OP(pred_op); + op = S_20_1_PRED_OP(pred_op); /* PREDICATION_DRAW_VISIBLE means that if the 32-bit value is zero, all * rendering commands are discarded. Otherwise, they are discarded if diff --git a/src/amd/vulkan/radv_dgc.c b/src/amd/vulkan/radv_dgc.c index eb22a5f870f..690f606c25b 100644 --- a/src/amd/vulkan/radv_dgc.c +++ b/src/amd/vulkan/radv_dgc.c @@ -1410,8 +1410,8 @@ dgc_emit_pkt3_draw_indirect(struct dgc_cmdbuf *cs, nir_def *has_drawid, bool ind dgc_cs_emit_imm(0); dgc_cs_emit(vertex_offset_reg); dgc_cs_emit(nir_bcsel(b, has_baseinstance, start_instance_reg, nir_imm_int(b, 0))); - dgc_cs_emit(nir_ior_imm(b, nir_ior(b, draw_id_reg, nir_imm_int(b, S_2C3_DRAW_INDEX_ENABLE(1))), - S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en))); + dgc_cs_emit(nir_ior_imm(b, nir_ior(b, draw_id_reg, nir_imm_int(b, S_2C_4_DRAW_INDEX_ENABLE(1))), + S_2C_4_THREAD_TRACE_MARKER_ENABLE(sqtt_en))); dgc_cs_emit_imm(1); /* draw count */ dgc_cs_emit_imm(0); /* count va low */ dgc_cs_emit_imm(0); /* count va high */ @@ -1542,7 +1542,7 @@ dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *s nir_def *start_instance_reg = nir_bcsel(b, has_baseinstance, nir_iadd(b, vertex_offset_reg, start_instance_offset), nir_imm_int(b, 0)); nir_def *draw_id_reg = nir_bcsel( - b, has_drawid, nir_ior_imm(b, nir_iadd(b, vertex_offset_reg, nir_imm_int(b, 1)), S_2C3_DRAW_INDEX_ENABLE(1)), + b, has_drawid, nir_ior_imm(b, nir_iadd(b, vertex_offset_reg, nir_imm_int(b, 1)), S_2C_4_DRAW_INDEX_ENABLE(1)), nir_imm_int(b, 0)); nir_def *di_src_sel = nir_imm_int(b, indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX); @@ -1555,7 +1555,7 @@ dgc_emit_draw_with_count(struct dgc_cmdbuf *cs, nir_def *stream_addr, nir_def *s dgc_cs_emit_imm(0); dgc_cs_emit(vertex_offset_reg); dgc_cs_emit(start_instance_reg); - dgc_cs_emit(nir_ior_imm(b, draw_id_reg, S_2C3_THREAD_TRACE_MARKER_ENABLE(sqtt_en))); + dgc_cs_emit(nir_ior_imm(b, draw_id_reg, S_2C_4_THREAD_TRACE_MARKER_ENABLE(sqtt_en))); dgc_cs_emit(draw_count); dgc_cs_emit_imm(0); dgc_cs_emit_imm(0); diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index 01692c90f7a..0c08ae1e001 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -686,8 +686,8 @@ radv_emit_graphics(struct radv_device *device, struct radv_cmd_stream *cs) if (!device->uses_shadow_regs) { ac_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - ac_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); - ac_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); + ac_pm4_cmd_add(pm4, S_28_1_UPDATE_LOAD_ENABLES(1)); + ac_pm4_cmd_add(pm4, S_28_2_UPDATE_SHADOW_ENABLES(1)); if (has_clear_state) { ac_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0)); @@ -1545,7 +1545,7 @@ radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool if (!unlock) { uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET; - ac_emit_cp_atomic_mem(cs->b, TC_OP_ATOMIC_CMPSWAP_32, ATOMIC_COMMAND_LOOP, mutex_va, 1, 0); + ac_emit_cp_atomic_mem(cs->b, V_1E_1_GL2_OP_ATOMIC_CMPSWAP_32, V_1E_1_LOOP_UNTIL_COMPARE_SATISFIED, mutex_va, 1, 0); } uint64_t va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET; diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c index a20cedb1649..58d53eeb210 100644 --- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c +++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c @@ -29,13 +29,13 @@ bool si_init_cp_reg_shadowing(struct si_context *sctx) } ac_pm4_cmd_add(shadowing_pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - ac_pm4_cmd_add(shadowing_pm4, CC0_UPDATE_LOAD_ENABLES(1) | - CC0_LOAD_PER_CONTEXT_STATE(1) | CC0_LOAD_CS_SH_REGS(1) | - CC0_LOAD_GFX_SH_REGS(1) | CC0_LOAD_GLOBAL_UCONFIG(1)); - ac_pm4_cmd_add(shadowing_pm4, CC1_UPDATE_SHADOW_ENABLES(1) | - CC1_SHADOW_PER_CONTEXT_STATE(1) | CC1_SHADOW_CS_SH_REGS(1) | - CC1_SHADOW_GFX_SH_REGS(1) | CC1_SHADOW_GLOBAL_UCONFIG(1) | - CC1_SHADOW_GLOBAL_CONFIG(1)); + ac_pm4_cmd_add(shadowing_pm4, S_28_1_UPDATE_LOAD_ENABLES(1) | + S_28_1_LOAD_PER_CONTEXT_STATE(1) | S_28_1_LOAD_CS_SH_REGS(1) | + S_28_1_LOAD_GFX_SH_REGS(1) | S_28_1_LOAD_GLOBAL_UCONFIG(1)); + ac_pm4_cmd_add(shadowing_pm4, S_28_2_UPDATE_SHADOW_ENABLES(1) | + S_28_2_SHADOW_PER_CONTEXT_STATE(1) | S_28_2_SHADOW_CS_SH_REGS(1) | + S_28_2_SHADOW_GFX_SH_REGS(1) | S_28_2_SHADOW_GLOBAL_UCONFIG(1) | + S_28_2_SHADOW_GLOBAL_CONFIG(1)); for (unsigned i = 0; i < SI_NUM_REG_RANGES; i++) ac_build_load_reg(&sctx->screen->info, shadowing_pm4, i, diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 444e0141677..112684b90c0 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -1083,7 +1083,7 @@ static void si_emit_query_predication(struct si_context *ctx, unsigned index) struct gfx11_sh_query *gfx10_query = (struct gfx11_sh_query *)query; struct gfx11_sh_query_buffer *qbuf, *first, *last; - op = PRED_OP(PREDICATION_OP_PRIMCOUNT); + op = S_20_1_PRED_OP(PREDICATION_OP_PRIMCOUNT); /* if true then invert, see GL_ARB_conditional_render_inverted */ if (!invert) @@ -1131,17 +1131,17 @@ static void si_emit_query_predication(struct si_context *ctx, unsigned index) struct si_query_buffer *qbuf; if (query->workaround_buf) { - op = PRED_OP(PREDICATION_OP_BOOL64); + op = S_20_1_PRED_OP(PREDICATION_OP_BOOL64); } else { switch (query->b.type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: - op = PRED_OP(PREDICATION_OP_ZPASS); + op = S_20_1_PRED_OP(PREDICATION_OP_ZPASS); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: - op = PRED_OP(PREDICATION_OP_PRIMCOUNT); + op = S_20_1_PRED_OP(PREDICATION_OP_PRIMCOUNT); invert = !invert; break; default: diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index a1b892776c6..30ca743d831 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -107,8 +107,8 @@ static void si_sqtt_start(struct si_context *sctx, struct radeon_cmdbuf *cs) switch (ip_type) { case AMD_IP_GFX: radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); - radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1)); + radeon_emit(S_28_1_UPDATE_LOAD_ENABLES(1)); + radeon_emit(S_28_2_UPDATE_SHADOW_ENABLES(1)); break; case AMD_IP_COMPUTE: radeon_emit(PKT3(PKT3_NOP, 0, 0)); @@ -160,8 +160,8 @@ static void si_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs) switch (ip_type) { case AMD_IP_GFX: radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - radeon_emit(CC0_UPDATE_LOAD_ENABLES(1)); - radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1)); + radeon_emit(S_28_1_UPDATE_LOAD_ENABLES(1)); + radeon_emit(S_28_2_UPDATE_SHADOW_ENABLES(1)); break; case AMD_IP_COMPUTE: radeon_emit(PKT3(PKT3_NOP, 0, 0)); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index bb4c93c4b17..54f7c63bacc 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4866,8 +4866,8 @@ static bool gfx6_init_gfx_preamble_state(struct si_context *sctx) if (sctx->is_gfx_queue && !sctx->uses_kernelq_reg_shadowing) { ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1)); - ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1)); if (sscreen->dpbb_allowed) { ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0)); @@ -4956,17 +4956,17 @@ static bool gfx10_init_gfx_preamble_state(struct si_context *sctx) */ if (sctx->gfx_level != GFX11_5) { ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1) | CC0_LOAD_PER_CONTEXT_STATE(1) | - CC0_LOAD_CS_SH_REGS(1) | CC0_LOAD_GFX_SH_REGS(1) | - CC0_LOAD_GLOBAL_UCONFIG(1)); - ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1) | CC1_SHADOW_PER_CONTEXT_STATE(1) | - CC1_SHADOW_CS_SH_REGS(1) | CC1_SHADOW_GFX_SH_REGS(1) | - CC1_SHADOW_GLOBAL_UCONFIG(1) | CC1_SHADOW_GLOBAL_CONFIG(1)); + ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1) | S_28_1_LOAD_PER_CONTEXT_STATE(1) | + S_28_1_LOAD_CS_SH_REGS(1) | S_28_1_LOAD_GFX_SH_REGS(1) | + S_28_1_LOAD_GLOBAL_UCONFIG(1)); + ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1) | S_28_2_SHADOW_PER_CONTEXT_STATE(1) | + S_28_2_SHADOW_CS_SH_REGS(1) | S_28_2_SHADOW_GFX_SH_REGS(1) | + S_28_2_SHADOW_GLOBAL_UCONFIG(1) | S_28_2_SHADOW_GLOBAL_CONFIG(1)); } } else if (sctx->is_gfx_queue && !sctx->uses_kernelq_reg_shadowing) { ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1)); - ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1)); if (sscreen->dpbb_allowed) { ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0)); @@ -5037,16 +5037,16 @@ static bool gfx12_init_gfx_preamble_state(struct si_context *sctx) if (sctx->uses_userq_reg_shadowing) { ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1) | CC0_LOAD_PER_CONTEXT_STATE(1) | - CC0_LOAD_CS_SH_REGS(1) | CC0_LOAD_GFX_SH_REGS(1) | - CC0_LOAD_GLOBAL_UCONFIG(1)); - ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1) | CC1_SHADOW_PER_CONTEXT_STATE(1) | - CC1_SHADOW_CS_SH_REGS(1) | CC1_SHADOW_GFX_SH_REGS(1) | - CC1_SHADOW_GLOBAL_UCONFIG(1) | CC1_SHADOW_GLOBAL_CONFIG(1)); + ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1) | S_28_1_LOAD_PER_CONTEXT_STATE(1) | + S_28_1_LOAD_CS_SH_REGS(1) | S_28_1_LOAD_GFX_SH_REGS(1) | + S_28_1_LOAD_GLOBAL_UCONFIG(1)); + ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1) | S_28_2_SHADOW_PER_CONTEXT_STATE(1) | + S_28_2_SHADOW_CS_SH_REGS(1) | S_28_2_SHADOW_GFX_SH_REGS(1) | + S_28_2_SHADOW_GLOBAL_UCONFIG(1) | S_28_2_SHADOW_GLOBAL_CONFIG(1)); } else if (sctx->is_gfx_queue && !sctx->uses_kernelq_reg_shadowing) { ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1)); - ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, S_28_1_UPDATE_LOAD_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, S_28_2_UPDATE_SHADOW_ENABLES(1)); } if (sctx->is_gfx_queue && sscreen->dpbb_allowed && !sctx->uses_userq_reg_shadowing) { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 25c5d444692..9410e84b56e 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -1610,8 +1610,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw radeon_emit((sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit((sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit(((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) | - S_2C3_DRAW_INDEX_ENABLE(sctx->vs_uses_draw_id) | - S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count)); + S_2C_4_DRAW_INDEX_ENABLE(sctx->vs_uses_draw_id) | + S_2C_4_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count)); radeon_emit(indirect->draw_count); radeon_emit(count_va); radeon_emit(count_va >> 32); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp index 678f15bc2b0..7e5ee667c88 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.cpp @@ -1495,17 +1495,17 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws, amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0)); amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo)); amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo) >> 32); - amdgpu_pkt_add_dw(userq->f32_shadowing_ib_pm4_dw | S_3F3_INHERIT_VMID_MQD_GFX(1)); + amdgpu_pkt_add_dw(userq->f32_shadowing_ib_pm4_dw | S_3F_3_INHERIT_VMID_PFP(1)); } amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0)); amdgpu_pkt_add_dw(csc->chunk_ib[IB_MAIN].va_start); amdgpu_pkt_add_dw(csc->chunk_ib[IB_MAIN].va_start >> 32); if (userq->ip_type == AMD_IP_GFX) - amdgpu_pkt_add_dw((csc->chunk_ib[IB_MAIN].ib_bytes / 4) | S_3F3_INHERIT_VMID_MQD_GFX(1)); + amdgpu_pkt_add_dw((csc->chunk_ib[IB_MAIN].ib_bytes / 4) | S_3F_3_INHERIT_VMID_PFP(1)); else - amdgpu_pkt_add_dw((csc->chunk_ib[IB_MAIN].ib_bytes / 4) | S_3F3_VALID_COMPUTE(1) | - S_3F3_INHERIT_VMID_MQD_COMPUTE(1)); + amdgpu_pkt_add_dw((csc->chunk_ib[IB_MAIN].ib_bytes / 4) | S_3F_3_VALID(1) | + S_3F_3_INHERIT_VMID_MEC(1)); /* Add 8 for release mem packet and 2 for protected fence signal packet. * Calculcating userq_fence_seq_num this way to match with kernel fence that is @@ -1549,7 +1549,7 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws, for (unsigned i = 0; i < 1 + DIV_ROUND_UP(num_fences, 4); i++) *cond_exec_skip_counts[i].count_dw_ptr = (amdgpu_pkt_get_next_wptr() - cond_exec_skip_counts[i].start_wptr) | - COND_EXEC_USERQ_OVERRULE_CMD; + S_22_4_EXEC_USERQ_OVERRULE_CMD(1); } } else { mesa_loge("amdgpu: unsupported userq ip submission = %d\n", userq->ip_type); diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c index e92ad158cec..49c20ea0131 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c @@ -320,7 +320,7 @@ amdgpu_userq_submit_cs_preamble_ib_once(struct radeon_cmdbuf *rcs, struct ac_pm4 amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0)); amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->cs_preamble_ib_bo)); amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->cs_preamble_ib_bo) >> 32); - amdgpu_pkt_add_dw(pm4->ndw | S_3F3_INHERIT_VMID_MQD_GFX(1)); + amdgpu_pkt_add_dw(pm4->ndw | S_3F_3_INHERIT_VMID_PFP(1)); amdgpu_pkt_end(); simple_mtx_unlock(&userq->lock); @@ -367,7 +367,7 @@ amdgpu_userq_f32_init_reg_shadowing(struct radeon_cmdbuf *rcs, struct ac_pm4_sta amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0)); amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo)); amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo) >> 32); - amdgpu_pkt_add_dw(pm4->ndw | S_3F3_INHERIT_VMID_MQD_GFX(1)); + amdgpu_pkt_add_dw(pm4->ndw | S_3F_3_INHERIT_VMID_PFP(1)); amdgpu_pkt_end(); simple_mtx_unlock(&userq->lock);