mesa/src/amd/common/ac_pm4.c
Antonio Ospite ddf2aa3a4d build: avoid redefining unreachable() which is standard in C23
In the C23 standard unreachable() is now a predefined function-like
macro in <stddef.h>

See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in

And this causes build errors when building for C23:

-----------------------------------------------------------------------
In file included from ../src/util/log.h:30,
                 from ../src/util/log.c:30:
../src/util/macros.h:123:9: warning: "unreachable" redefined
  123 | #define unreachable(str)    \
      |         ^~~~~~~~~~~
In file included from ../src/util/macros.h:31:
/usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition
  456 | #define unreachable() (__builtin_unreachable ())
      |         ^~~~~~~~~~~
-----------------------------------------------------------------------

So don't redefine it with the same name, but use the name UNREACHABLE()
to also signify it's a macro.

Using a different name also makes sense because the behavior of the
macro was extending the one of __builtin_unreachable() anyway, and it
also had a different signature, accepting one argument, compared to the
standard unreachable() with no arguments.

This change improves the chances of building mesa with the C23 standard,
which for instance is the default in recent AOSP versions.

All the instances of the macro, including the definition, were updated
with the following command line:

  git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \
  while read file; \
  do \
    sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \
  done && \
  sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-31 17:49:42 +00:00

449 lines
14 KiB
C

/*
* Copyright 2012 Advanced Micro Devices, Inc.
*
* SPDX-License-Identifier: MIT
*/
#include "ac_debug.h"
#include "ac_gpu_info.h"
#include "ac_pm4.h"
#include "sid.h"
#include <string.h>
#include <stdlib.h>
static bool
opcode_is_pairs(unsigned opcode)
{
return opcode == PKT3_SET_CONTEXT_REG_PAIRS ||
opcode == PKT3_SET_SH_REG_PAIRS ||
opcode == PKT3_SET_UCONFIG_REG_PAIRS;
}
static bool
opcode_is_pairs_packed(unsigned opcode)
{
return opcode == PKT3_SET_CONTEXT_REG_PAIRS_PACKED ||
opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N;
}
static bool
is_privileged_reg(const struct ac_pm4_state *state, unsigned reg)
{
const struct radeon_info *info = state->info;
if (info->gfx_level >= GFX10 && info->gfx_level <= GFX10_3)
return reg == R_008D04_SQ_THREAD_TRACE_BUF0_SIZE ||
reg == R_008D00_SQ_THREAD_TRACE_BUF0_BASE ||
reg == R_008D14_SQ_THREAD_TRACE_MASK ||
reg == R_008D18_SQ_THREAD_TRACE_TOKEN_MASK ||
reg == R_008D1C_SQ_THREAD_TRACE_CTRL;
if (info->gfx_level >= GFX6 && info->gfx_level <= GFX8)
return reg == R_009100_SPI_CONFIG_CNTL;
return false;
}
static unsigned
pairs_packed_opcode_to_regular(unsigned opcode)
{
switch (opcode) {
case PKT3_SET_CONTEXT_REG_PAIRS_PACKED:
return PKT3_SET_CONTEXT_REG;
case PKT3_SET_SH_REG_PAIRS_PACKED:
return PKT3_SET_SH_REG;
default:
UNREACHABLE("invalid packed opcode");
}
}
static unsigned
regular_opcode_to_pairs(struct ac_pm4_state *state, unsigned opcode)
{
const struct radeon_info *info = state->info;
switch (opcode) {
case PKT3_SET_CONTEXT_REG:
return info->has_set_context_pairs_packed ? PKT3_SET_CONTEXT_REG_PAIRS_PACKED :
info->has_set_context_pairs ? PKT3_SET_CONTEXT_REG_PAIRS : opcode;
case PKT3_SET_SH_REG:
return info->has_set_sh_pairs_packed ? PKT3_SET_SH_REG_PAIRS_PACKED :
info->has_set_sh_pairs ? PKT3_SET_SH_REG_PAIRS : opcode;
case PKT3_SET_UCONFIG_REG:
return info->has_set_uconfig_pairs ? PKT3_SET_UCONFIG_REG_PAIRS : opcode;
}
return opcode;
}
static bool
packed_next_is_reg_offset_pair(struct ac_pm4_state *state)
{
return (state->ndw - state->last_pm4) % 3 == 2;
}
static bool
packed_next_is_reg_value1(struct ac_pm4_state *state)
{
return (state->ndw - state->last_pm4) % 3 == 1;
}
static bool
packed_prev_is_reg_value0(struct ac_pm4_state *state)
{
return packed_next_is_reg_value1(state);
}
static unsigned
get_packed_reg_dw_offsetN(struct ac_pm4_state *state, unsigned index)
{
unsigned i = state->last_pm4 + 2 + (index / 2) * 3;
assert(i < state->ndw);
return (state->pm4[i] >> ((index % 2) * 16)) & 0xffff;
}
static unsigned
get_packed_reg_valueN_idx(struct ac_pm4_state *state, unsigned index)
{
unsigned i = state->last_pm4 + 2 + (index / 2) * 3 + 1 + (index % 2);
assert(i < state->ndw);
return i;
}
static unsigned
get_packed_reg_valueN(struct ac_pm4_state *state, unsigned index)
{
return state->pm4[get_packed_reg_valueN_idx(state, index)];
}
static unsigned
get_packed_reg_count(struct ac_pm4_state *state)
{
int body_size = state->ndw - state->last_pm4 - 2;
assert(body_size > 0 && body_size % 3 == 0);
return (body_size / 3) * 2;
}
void
ac_pm4_finalize(struct ac_pm4_state *state)
{
if (opcode_is_pairs_packed(state->last_opcode)) {
unsigned reg_count = get_packed_reg_count(state);
unsigned reg_dw_offset0 = get_packed_reg_dw_offsetN(state, 0);
if (state->packed_is_padded)
reg_count--;
bool all_consecutive = true;
/* If the whole packed SET packet only sets consecutive registers, rewrite the packet
* to be unpacked to make it shorter.
*
* This also eliminates the invalid scenario when the packed SET packet sets only
* 2 registers and the register offsets are equal due to padding.
*/
for (unsigned i = 1; i < reg_count; i++) {
if (reg_dw_offset0 != get_packed_reg_dw_offsetN(state, i) - i) {
all_consecutive = false;
break;
}
}
if (all_consecutive) {
assert(state->ndw - state->last_pm4 == 2 + 3 * (reg_count + state->packed_is_padded) / 2);
state->pm4[state->last_pm4] = PKT3(pairs_packed_opcode_to_regular(state->last_opcode),
reg_count, 0);
state->pm4[state->last_pm4 + 1] = reg_dw_offset0;
for (unsigned i = 0; i < reg_count; i++)
state->pm4[state->last_pm4 + 2 + i] = get_packed_reg_valueN(state, i);
state->ndw = state->last_pm4 + 2 + reg_count;
state->last_opcode = PKT3_SET_SH_REG;
} else {
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
if (state->debug_sqtt &&
(state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N)) {
if (state->packed_is_padded)
reg_count++; /* Add this back because we only need to record the last write. */
for (int i = reg_count - 1; i >= 0; i--) {
unsigned reg_offset = SI_SH_REG_OFFSET + get_packed_reg_dw_offsetN(state, i) * 4;
if (strstr(ac_get_register_name(state->info->gfx_level,
state->info->family, reg_offset),
"SPI_SHADER_PGM_LO_")) {
state->spi_shader_pgm_lo_reg = reg_offset;
break;
}
}
}
/* If it's a packed SET_SH packet, use the *_N variant when possible. */
if (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED && reg_count <= 14) {
state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C;
state->pm4[state->last_pm4] |= PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N);
}
}
}
if (state->debug_sqtt && state->last_opcode == PKT3_SET_SH_REG) {
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
unsigned reg_count = PKT_COUNT_G(state->pm4[state->last_pm4]);
unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 + 1] * 4;
for (unsigned i = 0; i < reg_count; i++) {
if (strstr(ac_get_register_name(state->info->gfx_level,
state->info->family, reg_base_offset + i * 4),
"SPI_SHADER_PGM_LO_")) {
state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4;
break;
}
}
}
if (state->debug_sqtt && state->last_opcode == PKT3_SET_SH_REG_PAIRS) {
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
unsigned reg_count = (PKT_COUNT_G(state->pm4[state->last_pm4]) + 1) / 2;
for (unsigned i = 0; i < reg_count; i++) {
unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 + 1 + 2 * i] * 4;
if (strstr(ac_get_register_name(state->info->gfx_level,
state->info->family, reg_base_offset),
"SPI_SHADER_PGM_LO_")) {
state->spi_shader_pgm_lo_reg = reg_base_offset;
break;
}
}
}
}
void
ac_pm4_cmd_begin(struct ac_pm4_state *state, unsigned opcode)
{
ac_pm4_finalize(state);
assert(state->max_dw);
assert(state->ndw < state->max_dw);
assert(opcode <= 254);
state->last_opcode = opcode;
state->last_pm4 = state->ndw++;
state->packed_is_padded = false;
}
void
ac_pm4_cmd_add(struct ac_pm4_state *state, uint32_t dw)
{
assert(state->max_dw);
assert(state->ndw < state->max_dw);
state->pm4[state->ndw++] = dw;
state->last_opcode = 255; /* invalid opcode */
}
static bool
need_reset_filter_cam(const struct ac_pm4_state *state)
{
const struct radeon_info *info = state->info;
/* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */
if (!state->is_compute_queue &&
(opcode_is_pairs(state->last_opcode) ||
opcode_is_pairs_packed(state->last_opcode)))
return true;
const uint32_t last_reg = state->last_reg << 2;
if (info->gfx_level >= GFX11 && !state->is_compute_queue &&
(last_reg + CIK_UCONFIG_REG_OFFSET == R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE ||
last_reg + CIK_UCONFIG_REG_OFFSET == R_0367A0_SQ_THREAD_TRACE_BUF0_BASE ||
last_reg + CIK_UCONFIG_REG_OFFSET == R_0367B4_SQ_THREAD_TRACE_MASK ||
last_reg + CIK_UCONFIG_REG_OFFSET == R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK ||
last_reg + CIK_UCONFIG_REG_OFFSET == R_0367B0_SQ_THREAD_TRACE_CTRL))
return true;
return false;
}
void
ac_pm4_cmd_end(struct ac_pm4_state *state, bool predicate)
{
unsigned count;
count = state->ndw - state->last_pm4 - 2;
/* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */
bool reset_filter_cam = need_reset_filter_cam(state);
state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate) |
PKT3_RESET_FILTER_CAM_S(reset_filter_cam);
if (opcode_is_pairs_packed(state->last_opcode)) {
if (packed_prev_is_reg_value0(state)) {
/* Duplicate the first register at the end to make the number of registers aligned to 2. */
ac_pm4_set_reg_custom(state, get_packed_reg_dw_offsetN(state, 0) * 4,
get_packed_reg_valueN(state, 0),
state->last_opcode, 0);
state->packed_is_padded = true;
}
state->pm4[state->last_pm4 + 1] = get_packed_reg_count(state);
}
}
void
ac_pm4_set_reg_custom(struct ac_pm4_state *state, unsigned reg, uint32_t val,
unsigned opcode, unsigned idx)
{
bool is_packed = opcode_is_pairs_packed(opcode);
reg >>= 2;
assert(state->max_dw);
assert(state->ndw + 2 <= state->max_dw);
if (is_packed) {
assert(idx == 0);
if (opcode != state->last_opcode) {
ac_pm4_cmd_begin(state, opcode); /* reserve space for the header */
state->ndw++; /* reserve space for the register count, it will be set at the end */
}
} else if (opcode_is_pairs(opcode)) {
assert(idx == 0);
if (opcode != state->last_opcode)
ac_pm4_cmd_begin(state, opcode);
state->pm4[state->ndw++] = reg;
} else if (opcode != state->last_opcode || reg != (state->last_reg + 1) ||
idx != state->last_idx) {
ac_pm4_cmd_begin(state, opcode);
state->pm4[state->ndw++] = reg | (idx << 28);
}
assert(reg <= UINT16_MAX);
state->last_reg = reg;
state->last_idx = idx;
if (is_packed) {
if (state->packed_is_padded) {
/* The packet is padded, which means the first register is written redundantly again
* at the end. Remove it, so that we can replace it with this register.
*/
state->packed_is_padded = false;
state->ndw--;
}
if (packed_next_is_reg_offset_pair(state)) {
state->pm4[state->ndw++] = reg;
} else if (packed_next_is_reg_value1(state)) {
/* Set the second register offset in the high 16 bits. */
state->pm4[state->ndw - 2] &= 0x0000ffff;
state->pm4[state->ndw - 2] |= reg << 16;
}
}
state->pm4[state->ndw++] = val;
ac_pm4_cmd_end(state, false);
}
static void
ac_pm4_set_privileged_reg(struct ac_pm4_state *state, unsigned reg, uint32_t val)
{
assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
ac_pm4_cmd_add(state, PKT3(PKT3_COPY_DATA, 4, 0));
ac_pm4_cmd_add(state, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF));
ac_pm4_cmd_add(state, val);
ac_pm4_cmd_add(state, 0); /* unused */
ac_pm4_cmd_add(state, reg >> 2);
ac_pm4_cmd_add(state, 0); /* unused */
}
void ac_pm4_set_reg(struct ac_pm4_state *state, unsigned reg, uint32_t val)
{
const unsigned original_reg = reg;
unsigned opcode;
if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) {
opcode = PKT3_SET_CONFIG_REG;
reg -= SI_CONFIG_REG_OFFSET;
} else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) {
opcode = PKT3_SET_SH_REG;
reg -= SI_SH_REG_OFFSET;
} else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) {
opcode = PKT3_SET_CONTEXT_REG;
reg -= SI_CONTEXT_REG_OFFSET;
} else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) {
opcode = PKT3_SET_UCONFIG_REG;
reg -= CIK_UCONFIG_REG_OFFSET;
} else {
fprintf(stderr, "mesa: Invalid register offset %08x!\n", reg);
return;
}
if (is_privileged_reg(state, original_reg)) {
ac_pm4_set_privileged_reg(state, original_reg, val);
} else {
opcode = regular_opcode_to_pairs(state, opcode);
ac_pm4_set_reg_custom(state, reg, val, opcode, 0);
}
}
void
ac_pm4_set_reg_idx3(struct ac_pm4_state *state, unsigned reg, uint32_t val)
{
if (state->info->uses_kernel_cu_mask) {
assert(state->info->gfx_level >= GFX10);
ac_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX, 3);
} else {
ac_pm4_set_reg(state, reg, val);
}
}
void
ac_pm4_clear_state(struct ac_pm4_state *state, const struct radeon_info *info,
bool debug_sqtt, bool is_compute_queue)
{
state->info = info;
state->debug_sqtt = debug_sqtt;
state->ndw = 0;
state->is_compute_queue = is_compute_queue;
if (!state->max_dw)
state->max_dw = ARRAY_SIZE(state->pm4);
}
struct ac_pm4_state *
ac_pm4_create_sized(const struct radeon_info *info, bool debug_sqtt,
unsigned max_dw, bool is_compute_queue)
{
struct ac_pm4_state *pm4;
unsigned size;
max_dw = MAX2(max_dw, ARRAY_SIZE(pm4->pm4));
size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->pm4));
pm4 = (struct ac_pm4_state *)calloc(1, size);
if (pm4) {
pm4->max_dw = max_dw;
ac_pm4_clear_state(pm4, info, debug_sqtt, is_compute_queue);
}
return pm4;
}
void
ac_pm4_free_state(struct ac_pm4_state *state)
{
if (!state)
return;
free(state);
}