pan/kmod: Expose the raw GPU ID through pan_kmod_dev_props

Rather than splitting the GPU ID in two, let the GPU ID users do that
when they need.

We also rework the model detection to use a mask so we can customize
the mask if the version major/minor fields are required to differentiate
two GPUs with the same arch major/minor and product major.

Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35155>
This commit is contained in:
Boris Brezillon 2025-04-28 14:39:19 +02:00 committed by Marge Bot
parent e18da57276
commit d14f2df85a
27 changed files with 190 additions and 125 deletions

View file

@ -116,7 +116,7 @@ GENX(pan_blend_get_shader_locked)(struct pan_blend_shader_cache *cache,
#else
NIR_PASS(_, nir, pan_lower_framebuffer, rt_formats,
pan_raw_format_mask_midgard(rt_formats), MAX2(key.nr_samples, 1),
cache->gpu_id < 0x700);
(cache->gpu_id >> 16) < 0x700);
#endif
struct util_dynarray binary;

View file

@ -62,9 +62,9 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props);
dev->arch = pan_arch(dev->kmod.props.gpu_prod_id);
dev->model =
pan_get_model(dev->kmod.props.gpu_prod_id, dev->kmod.props.gpu_variant);
dev->arch = pan_arch(dev->kmod.props.gpu_id);
dev->model = pan_get_model(dev->kmod.props.gpu_id,
dev->kmod.props.gpu_variant);
/* If we don't recognize the model, bail early */
if (!dev->model)

View file

@ -190,13 +190,19 @@ panfrost_device_fd(const struct panfrost_device *dev)
static inline uint32_t
panfrost_device_gpu_id(const struct panfrost_device *dev)
{
return dev->kmod.props.gpu_prod_id;
return dev->kmod.props.gpu_id;
}
static inline uint32_t
panfrost_device_gpu_prod_id(const struct panfrost_device *dev)
{
return dev->kmod.props.gpu_id >> 16;
}
static inline uint32_t
panfrost_device_gpu_rev(const struct panfrost_device *dev)
{
return dev->kmod.props.gpu_revision;
return dev->kmod.props.gpu_id & BITFIELD_MASK(16);
}
static inline int

View file

@ -1274,7 +1274,7 @@ pan_preload_emit_pre_frame_dcd(struct pan_fb_preload_cache *cache,
* The PAN_ARCH check is redundant but allows the compiler to optimize
* when PAN_ARCH < 7.
*/
if (PAN_ARCH >= 7 && cache->gpu_id >= 0x7200)
if (PAN_ARCH >= 7 && (cache->gpu_id >> 16) >= 0x7200)
fb->bifrost.pre_post.modes[dcd_idx] =
MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS;
else

View file

@ -154,7 +154,7 @@ get_max_msaa(struct panfrost_device *dev, enum pipe_format format)
/* t760 (GPU ID 0x750 - not a typo) has a HW issue in versions before
* the r1p0 version, which prevents 16x MSAA from working properly.
*/
if (panfrost_device_gpu_id(dev) == 0x750 &&
if (panfrost_device_gpu_prod_id(dev) == 0x750 &&
panfrost_device_gpu_rev(dev) < 0x1000)
max_msaa = MIN2(max_msaa, 8);
@ -990,7 +990,7 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config,
/* Bail early on unsupported hardware */
if (dev->model == NULL) {
debug_printf("panfrost: Unsupported model %X",
panfrost_device_gpu_id(dev));
panfrost_device_gpu_prod_id(dev));
panfrost_destroy_screen(&(screen->base));
return NULL;
}

View file

@ -197,7 +197,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
if (dev->arch <= 5 && s->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, s, pan_lower_framebuffer, key->fs.rt_formats,
pan_raw_format_mask_midgard(key->fs.rt_formats), 0,
panfrost_device_gpu_id(dev) < 0x700);
panfrost_device_gpu_prod_id(dev) < 0x700);
}
if (s->info.stage == MESA_SHADER_VERTEX)

View file

@ -265,7 +265,7 @@ main(int argc, const char **argv)
const char *output_h_path = argv[4];
const char *output_c_path = argv[5];
int target_arch = atoi(target_arch_str);
unsigned target_arch = atoi(target_arch_str);
if (target_arch < 4 || target_arch > 13) {
fprintf(stderr, "Unsupported target arch %d\n", target_arch);
@ -337,8 +337,10 @@ main(int argc, const char **argv)
libfunc, v, get_compiler_options(target_arch), &opt,
load_kernel_input);
unsigned gpu_prod_id = (target_arch & 0xf) << 12;
struct pan_compile_inputs inputs = {
.gpu_id = target_arch << 12,
.gpu_id = gpu_prod_id << 16,
};
nir_link_shader_functions(s, nir);

View file

@ -42,9 +42,9 @@
#define BIFROST_LIMITED_CLPER (1 << 1)
static inline unsigned
bifrost_get_quirks(unsigned product_id)
bifrost_get_quirks(unsigned gpu_id)
{
switch (product_id >> 8) {
switch (gpu_id >> 24) {
case 0x60: /* G71 */
return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
case 0x62: /* G72 */

View file

@ -5441,7 +5441,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
}
/* This opt currently helps on Bifrost but not Valhall */
if (gpu_id < 0x9000)
if (pan_arch(gpu_id) < 9)
NIR_PASS(progress, nir, bifrost_nir_opt_boolean_bitwise);
NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL);
@ -5849,8 +5849,8 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
* scratch access.
*/
glsl_type_size_align_func vars_to_scratch_size_align_func =
(gpu_id >= 0x9000) ? glsl_get_vec4_size_align_bytes
: glsl_get_natural_size_align_bytes;
(pan_arch(gpu_id) >= 9) ? glsl_get_vec4_size_align_bytes
: glsl_get_natural_size_align_bytes;
/* Lower large arrays to scratch and small arrays to bcsel */
NIR_PASS(_, nir, nir_lower_scratch_to_var);
NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256,
@ -5884,7 +5884,7 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
NIR_PASS(_, nir, bifrost_nir_lower_load_output);
} else if (nir->info.stage == MESA_SHADER_VERTEX) {
if (gpu_id >= 0x9000) {
if (pan_arch(gpu_id) >= 9) {
NIR_PASS(_, nir, nir_lower_mediump_io, nir_var_shader_out,
VARYING_BIT_PSIZ, false);
}
@ -6377,7 +6377,7 @@ bi_should_idvs(nir_shader *nir, const struct pan_compile_inputs *inputs)
return false;
/* Bifrost cannot write gl_PointSize during IDVS */
if ((inputs->gpu_id < 0x9000) &&
if ((pan_arch(inputs->gpu_id) < 9) &&
nir->info.outputs_written & VARYING_BIT_PSIZ)
return false;

View file

@ -29,8 +29,9 @@
#include "bifrost/disassemble.h"
#include "util/macros.h"
#include "valhall/disassemble.h"
#include "panfrost/lib/pan_props.h"
unsigned gpu_id = 0x7212;
unsigned gpu_id = 0x72120000;
int verbose = 0;
#define BI_FOURCC(ch0, ch1, ch2, ch3) \
@ -70,7 +71,7 @@ disassemble(const char *filename)
}
}
if ((gpu_id >> 12) >= 9)
if (pan_arch(gpu_id) >= 9)
disassemble_valhall(stdout, entrypoint, filesize, verbose);
else
disassemble_bifrost(stdout, entrypoint, filesize, verbose);
@ -115,6 +116,7 @@ main(int argc, char **argv)
return 1;
}
gpu_id <<= 16;
break;
case 'g':
gpu_id = 0;

View file

@ -149,11 +149,8 @@ struct pan_kmod_bo {
/* List of GPU properties needed by the UMD. */
struct pan_kmod_dev_props {
/* GPU product ID. */
uint32_t gpu_prod_id;
/* GPU revision. */
uint32_t gpu_revision;
/* GPU ID. */
uint32_t gpu_id;
/* GPU variant. */
uint32_t gpu_variant;

View file

@ -105,7 +105,7 @@ panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev,
props->max_threads_per_core =
panfrost_query_raw(fd, DRM_PANFROST_PARAM_MAX_THREADS, true, 0);
if (!props->max_threads_per_core) {
switch (pan_arch(props->gpu_prod_id)) {
switch (pan_arch(props->gpu_id)) {
case 4:
case 5:
props->max_threads_per_core = 256;
@ -141,7 +141,7 @@ panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev,
props->max_tasks_per_core = MAX2(thread_features >> 24, 1);
props->num_registers_per_core = thread_features & 0xffff;
if (!props->num_registers_per_core) {
switch (pan_arch(props->gpu_prod_id)) {
switch (pan_arch(props->gpu_id)) {
case 4:
case 5:
/* Assume we can always schedule max_threads_per_core when using 4
@ -183,9 +183,8 @@ panfrost_dev_query_props(const struct pan_kmod_dev *dev,
int fd = dev->fd;
memset(props, 0, sizeof(*props));
props->gpu_prod_id =
panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0);
props->gpu_revision =
props->gpu_id =
(panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0) << 16) |
panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0);
props->shader_present =
panfrost_query_raw(fd, DRM_PANFROST_PARAM_SHADER_PRESENT, true, 0);

View file

@ -266,8 +266,7 @@ panthor_dev_query_props(const struct pan_kmod_dev *dev,
container_of(dev, struct panthor_kmod_dev, base);
*props = (struct pan_kmod_dev_props){
.gpu_prod_id = panthor_dev->props.gpu.gpu_id >> 16,
.gpu_revision = panthor_dev->props.gpu.gpu_id & 0xffff,
.gpu_id = panthor_dev->props.gpu.gpu_id,
.gpu_variant = panthor_dev->props.gpu.core_features & 0xff,
.shader_present = panthor_dev->props.gpu.shader_present,
.tiler_features = panthor_dev->props.gpu.tiler_features,

View file

@ -32,61 +32,105 @@
#include <genxml/gen_macros.h>
/* Fixed "minimum revisions" */
#define NO_ANISO (~0)
#define HAS_ANISO (0)
/* GPU revision (rXpY) */
#define GPU_REV(X, Y) (((X) & 0xf) << 12 | ((Y) & 0xff) << 4)
#define MODEL(gpu_id_, gpu_variant_, shortname, counters_, \
min_rev_anisotropic_, tib_size_, tib_z_size_, ...) \
/* Fixed "minimum revisions" */
#define GPU_REV_NONE (~0)
#define GPU_REV_ALL GPU_REV(0, 0)
#define GPU_REV_R0P3 GPU_REV(0, 3)
#define GPU_REV_R1P1 GPU_REV(1, 1)
#define MODEL(gpu_prod_id_, gpu_prod_id_mask_, gpu_variant_, shortname, \
counters, ...) \
{ \
.gpu_id = gpu_id_, .gpu_variant = gpu_variant_, \
.gpu_prod_id = gpu_prod_id_, \
.gpu_prod_id_mask = gpu_prod_id_mask_, \
.gpu_variant = gpu_variant_, \
.name = "Mali-" shortname, \
.performance_counters = counters_, \
.min_rev_anisotropic = min_rev_anisotropic_, \
.tilebuffer_size = tib_size_, \
.tilebuffer_z_size = tib_z_size_, \
.quirks = { __VA_ARGS__ }, \
.performance_counters = counters, \
##__VA_ARGS__, \
}
#define MIDGARD_MODEL(gpu_prod_id, shortname, counters, ...) \
MODEL(gpu_prod_id << 16, 0xffff0000, 0, shortname, counters, ##__VA_ARGS__)
#define BIFROST_MODEL(gpu_prod_id, shortname, counters, ...) \
MODEL(gpu_prod_id << 16, ARCH_MAJOR | ARCH_MINOR | PRODUCT_MAJOR, 0, \
shortname, counters, ##__VA_ARGS__)
#define VALHALL_MODEL(gpu_prod_id, gpu_variant, shortname, counters, ...) \
MODEL(gpu_prod_id << 16, ARCH_MAJOR | ARCH_MINOR | PRODUCT_MAJOR, \
gpu_variant, shortname, counters, ##__VA_ARGS__)
#define AVALON_MODEL(gpu_prod_id, gpu_variant, shortname, counters, ...) \
MODEL(gpu_prod_id << 16, ARCH_MAJOR | ARCH_MINOR | PRODUCT_MAJOR, \
gpu_variant, shortname, counters, ##__VA_ARGS__)
#define MODEL_ANISO(rev) .min_rev_anisotropic = GPU_REV_##rev
#define MODEL_TB_SIZES(color_tb_size, z_tb_size) \
.tilebuffer = { \
.color_size = color_tb_size, \
.z_size = z_tb_size, \
}
#define MODEL_QUIRKS(...) .quirks = {__VA_ARGS__}
/* Table of supported Mali GPUs */
/* clang-format off */
const struct pan_model pan_model_list[] = {
MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, 8192, .max_4x_msaa = true),
MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, 8192),
MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, 8192, .no_hierarchical_tiling = true, .max_4x_msaa = true),
MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, 8192),
MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, 8192, .no_hierarchical_tiling = true, .max_4x_msaa = true),
MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, 8192, .no_hierarchical_tiling = true, .max_4x_msaa = true),
MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, 8192),
MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, 8192),
MIDGARD_MODEL(0x600, "T600", "T60x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192),
MODEL_QUIRKS( .max_4x_msaa = true )),
MIDGARD_MODEL(0x620, "T620", "T62x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)),
MIDGARD_MODEL(0x720, "T720", "T72x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192),
MODEL_QUIRKS( .no_hierarchical_tiling = true, .max_4x_msaa = true )),
MIDGARD_MODEL(0x750, "T760", "T76x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)),
MIDGARD_MODEL(0x820, "T820", "T82x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192),
MODEL_QUIRKS( .no_hierarchical_tiling = true, .max_4x_msaa = true )),
MIDGARD_MODEL(0x830, "T830", "T83x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192),
MODEL_QUIRKS( .no_hierarchical_tiling = true, .max_4x_msaa = true )),
MIDGARD_MODEL(0x860, "T860", "T86x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)),
MIDGARD_MODEL(0x880, "T880", "T88x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)),
MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, 8192),
MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, 8192),
MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 8192, 8192),
MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 8192, 8192),
MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, 8192),
MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, 8192),
MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 8192, 8192),
MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192),
MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192),
BIFROST_MODEL(0x6000, "G71", "TMIx", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)),
BIFROST_MODEL(0x6201, "G72", "THEx", MODEL_ANISO(R0P3), MODEL_TB_SIZES(16384, 8192)),
BIFROST_MODEL(0x7000, "G51", "TSIx", MODEL_ANISO(R1P1), MODEL_TB_SIZES( 8192, 8192)),
BIFROST_MODEL(0x7003, "G31", "TDVx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)),
BIFROST_MODEL(0x7201, "G76", "TNOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
BIFROST_MODEL(0x7202, "G52", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
BIFROST_MODEL(0x7402, "G52 r1", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)),
MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, 16384),
MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, 8192),
MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, 8192),
MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, 8192),
MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, 16384),
MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, 16384),
VALHALL_MODEL(0x9001, 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
VALHALL_MODEL(0x9003, 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
VALHALL_MODEL(0xa807, 0, "G610", "TVIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384)),
VALHALL_MODEL(0xac04, 0, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
VALHALL_MODEL(0xac04, 1, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
VALHALL_MODEL(0xac04, 2, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)),
VALHALL_MODEL(0xac04, 3, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384)),
VALHALL_MODEL(0xac04, 4, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384)),
MODEL(0xc870, 4, "G720", "TTIx", HAS_ANISO, 65536, 32768),
MODEL(0xd830, 4, "G725", "TKRx", HAS_ANISO, 65536, 65536),
AVALON_MODEL( 0xc800, 4, "G720", "TTIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 32768)),
AVALON_MODEL( 0xd800, 4, "G725", "TKRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536)),
};
/* clang-format on */
#undef NO_ANISO
#undef HAS_ANISO
#undef GPU_REV
#undef GPU_REV_NONE
#undef GPU_REV_ALL
#undef GPU_REV_R0P3
#undef GPU_REV_R1P1
#undef MIDGARD_MODEL
#undef BIFROST_MODEL
#undef VALHALL_MODEL
#undef AVALON_MODEL
#undef MODEL
#undef MODEL_ANISO
#undef MODEL_TB_SIZES
#undef MODEL_QUIRKS
/*
* Look up a supported model by its GPU ID, or return NULL if the model is not
* supported at this time.
@ -95,7 +139,9 @@ const struct pan_model *
pan_get_model(uint32_t gpu_id, uint32_t gpu_variant)
{
for (unsigned i = 0; i < ARRAY_SIZE(pan_model_list); ++i) {
if (pan_model_list[i].gpu_id == gpu_id &&
uint32_t gpu_prod_id = gpu_id & pan_model_list[i].gpu_prod_id_mask;
if (pan_model_list[i].gpu_prod_id == gpu_prod_id &&
pan_model_list[i].gpu_variant == gpu_variant)
return &pan_model_list[i];
}
@ -156,7 +202,7 @@ pan_compute_max_thread_count(const struct pan_kmod_dev_props *props,
/* 4, 8 or 16 registers per shader on Midgard
* 32 or 64 registers per shader on Bifrost
*/
if (pan_arch(props->gpu_prod_id) <= 5) {
if (pan_arch(props->gpu_id) <= 5) {
aligned_reg_count = util_next_power_of_two(MAX2(work_reg_count, 4));
assert(aligned_reg_count <= 16);
} else {
@ -181,7 +227,7 @@ pan_query_afbc(const struct pan_kmod_dev_props *props)
{
unsigned reg = props->afbc_features;
return (pan_arch(props->gpu_prod_id) >= 5) && (reg == 0);
return (pan_arch(props->gpu_id) >= 5) && (reg == 0);
}
/* Check for AFRC hardware support. AFRC is introduced in v10. Implementations
@ -190,7 +236,7 @@ pan_query_afbc(const struct pan_kmod_dev_props *props)
bool
pan_query_afrc(const struct pan_kmod_dev_props *props)
{
return (pan_arch(props->gpu_prod_id) >= 10) &&
return (pan_arch(props->gpu_id) >= 10) &&
(props->texture_features[0] & (1 << 25));
}
@ -207,10 +253,10 @@ pan_query_optimal_tib_size(const struct pan_model *model)
/* Preconditions ensure the returned value is a multiple of 1 KiB, the
* granularity of the colour buffer allocation field.
*/
assert(model->tilebuffer_size >= 2048);
assert(util_is_power_of_two_nonzero(model->tilebuffer_size));
assert(model->tilebuffer.color_size >= 2048);
assert(util_is_power_of_two_nonzero(model->tilebuffer.color_size));
return model->tilebuffer_size / 2;
return model->tilebuffer.color_size / 2;
}
unsigned
@ -219,10 +265,10 @@ pan_query_optimal_z_tib_size(const struct pan_model *model)
/* Preconditions ensure the returned value is a multiple of 1 KiB, the
* granularity of the colour buffer allocation field.
*/
assert(model->tilebuffer_z_size >= 1024);
assert(util_is_power_of_two_nonzero(model->tilebuffer_z_size));
assert(model->tilebuffer.z_size >= 1024);
assert(util_is_power_of_two_nonzero(model->tilebuffer.z_size));
return model->tilebuffer_z_size / 2;
return model->tilebuffer.z_size / 2;
}
uint64_t

View file

@ -45,9 +45,20 @@ struct pan_tiler_features {
unsigned max_levels;
};
#define ARCH_MAJOR BITFIELD_RANGE(28, 4)
#define ARCH_MINOR BITFIELD_RANGE(24, 4)
#define ARCH_REV BITFIELD_RANGE(20, 4)
#define PRODUCT_MAJOR BITFIELD_RANGE(16, 4)
#define VERSION_MAJOR BITFIELD_RANGE(12, 4)
#define VERSION_MINOR BITFIELD_RANGE(4, 8)
#define VERSION_STATUS BITFIELD_RANGE(0, 4)
struct pan_model {
/* GPU ID */
uint32_t gpu_id;
/* GPU product ID */
uint32_t gpu_prod_id;
/* Mask to apply to the GPU ID to get a product ID. */
uint32_t gpu_prod_id_mask;
/* GPU variant. */
uint32_t gpu_variant;
@ -65,11 +76,13 @@ struct pan_model {
*/
uint32_t min_rev_anisotropic;
/* Default tilebuffer size in bytes for the model. */
unsigned tilebuffer_size;
struct {
/* Default tilebuffer size in bytes for the model. */
uint32_t color_size;
/* Default tilebuffer depth size in bytes for the model. */
unsigned tilebuffer_z_size;
/* Default tilebuffer depth size in bytes for the model. */
uint32_t z_size;
} tilebuffer;
struct {
/* The GPU lacks the capability for hierarchical tiling, without
@ -116,7 +129,7 @@ unsigned pan_compute_max_thread_count(const struct pan_kmod_dev_props *props,
static inline unsigned
pan_arch(unsigned gpu_id)
{
switch (gpu_id) {
switch (gpu_id >> 16) {
case 0x600:
case 0x620:
case 0x720:
@ -128,7 +141,7 @@ pan_arch(unsigned gpu_id)
case 0x880:
return 5;
default:
return gpu_id >> 12;
return gpu_id >> 28;
}
}

View file

@ -73,7 +73,7 @@
static inline unsigned
midgard_get_quirks(unsigned gpu_id)
{
switch (gpu_id) {
switch (gpu_id >> 16) {
case 0x600:
return MIDGARD_OLD_BLEND | MIDGARD_BROKEN_BLEND_LOADS |
MIDGARD_BROKEN_LOD | MIDGARD_NO_UPPER_ALU | MIDGARD_NO_OOO |

View file

@ -86,7 +86,7 @@ pan_perf_init(struct pan_perf *perf, int fd)
pan_kmod_dev_query_props(perf->dev, &props);
const struct pan_model *model =
pan_get_model(props.gpu_prod_id, props.gpu_variant);
pan_get_model(props.gpu_id, props.gpu_variant);
if (model == NULL)
unreachable("Invalid GPU ID");

View file

@ -281,7 +281,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
nir_builder b = nir_builder_init_simple_shader(
MESA_SHADER_COMPUTE,
pan_shader_get_compiler_options(
pan_arch(phys_dev->kmod.props.gpu_prod_id)),
pan_arch(phys_dev->kmod.props.gpu_id)),
"%s", "desc_copy");
/* We actually customize that at execution time to issue the
@ -295,7 +295,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev)
single_desc_copy(&b, desc_copy_id);
struct pan_compile_inputs inputs = {
.gpu_id = phys_dev->kmod.props.gpu_prod_id,
.gpu_id = phys_dev->kmod.props.gpu_id,
};
pan_shader_preprocess(b.shader, inputs.gpu_id);

View file

@ -509,7 +509,7 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue)
subqueue);
pandecode_cs_binary(dev->debug.decode_ctx, qsubmit.stream_addr,
qsubmit.stream_size,
phys_dev->kmod.props.gpu_prod_id);
phys_dev->kmod.props.gpu_id);
}
return VK_SUCCESS;
@ -1123,7 +1123,7 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit)
pandecode_user_msg(decode_ctx, "CS %d on subqueue %d binaries\n\n", i,
qsubmit->queue_index);
pandecode_cs_binary(decode_ctx, qsubmit->stream_addr,
qsubmit->stream_size, props->gpu_prod_id);
qsubmit->stream_size, props->gpu_id);
pandecode_user_msg(decode_ctx, "\n");
}
@ -1148,7 +1148,7 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit)
uint64_t trace = queue->subqueues[i].tracebuf.addr.dev;
pandecode_user_msg(decode_ctx, "\nCS traces on subqueue %d\n\n", i);
pandecode_cs_trace(decode_ctx, trace, trace_size, props->gpu_prod_id);
pandecode_cs_trace(decode_ctx, trace, trace_size, props->gpu_id);
pandecode_user_msg(decode_ctx, "\n");
}
}

View file

@ -80,7 +80,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
if (debug & PANVK_DEBUG_TRACE) {
pandecode_jc(dev->debug.decode_ctx, batch->vtc_jc.first_job,
phys_dev->kmod.props.gpu_prod_id);
phys_dev->kmod.props.gpu_id);
}
if (debug & PANVK_DEBUG_DUMP)
@ -88,7 +88,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
if (debug & PANVK_DEBUG_SYNC)
pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc,
phys_dev->kmod.props.gpu_prod_id);
phys_dev->kmod.props.gpu_id);
}
if (batch->frag_jc.first_job) {
@ -118,14 +118,14 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc
if (debug & PANVK_DEBUG_TRACE)
pandecode_jc(dev->debug.decode_ctx, batch->frag_jc.first_job,
phys_dev->kmod.props.gpu_prod_id);
phys_dev->kmod.props.gpu_id);
if (debug & PANVK_DEBUG_DUMP)
pandecode_dump_mappings(dev->debug.decode_ctx);
if (debug & PANVK_DEBUG_SYNC)
pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc,
phys_dev->kmod.props.gpu_prod_id);
phys_dev->kmod.props.gpu_id);
}
if (debug & PANVK_DEBUG_TRACE)

View file

@ -57,7 +57,7 @@ panvk_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount,
VK_FROM_HANDLE(panvk_device, device, _device);
const struct panvk_physical_device *phys_dev =
to_panvk_physical_device(device->vk.physical);
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id);
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
VkResult result = VK_SUCCESS;
for (uint32_t i = 0; i < bindInfoCount; i++) {

View file

@ -51,7 +51,7 @@ panvk_image_can_use_mod(struct panvk_image *image, uint64_t mod)
{
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(image->vk.base.device->physical);
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id);
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
struct panvk_instance *instance =
to_panvk_instance(image->vk.base.device->physical->instance);
enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format);
@ -250,7 +250,7 @@ panvk_image_init_layouts(struct panvk_image *image,
{
struct panvk_physical_device *phys_dev =
to_panvk_physical_device(image->vk.base.device->physical);
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id);
unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_info =
vk_find_struct_const(
pCreateInfo->pNext,

View file

@ -184,7 +184,7 @@ static VkResult
get_device_sync_types(struct panvk_physical_device *device,
const struct panvk_instance *instance)
{
const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
const unsigned arch = pan_arch(device->kmod.props.gpu_id);
uint32_t sync_type_count = 0;
device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd);
@ -254,15 +254,15 @@ panvk_physical_device_init(struct panvk_physical_device *device,
pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props);
device->model = pan_get_model(device->kmod.props.gpu_prod_id,
device->model = pan_get_model(device->kmod.props.gpu_id,
device->kmod.props.gpu_variant);
unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
unsigned arch = pan_arch(device->kmod.props.gpu_id);
if (!device->model) {
result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"Unknown gpu_id (%#x) or variant (%#x)",
device->kmod.props.gpu_prod_id,
device->kmod.props.gpu_id,
device->kmod.props.gpu_variant);
goto fail;
}
@ -300,7 +300,7 @@ panvk_physical_device_init(struct panvk_physical_device *device,
memset(device->name, 0, sizeof(device->name));
sprintf(device->name, "%s", device->model->name);
if (get_cache_uuid(device->kmod.props.gpu_prod_id, device->cache_uuid)) {
if (get_cache_uuid(device->kmod.props.gpu_id, device->cache_uuid)) {
result = panvk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"cannot generate UUID");
goto fail;
@ -397,7 +397,7 @@ panvk_GetPhysicalDeviceQueueFamilyProperties2(
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
pQueueFamilyPropertyCount);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
{
@ -464,7 +464,7 @@ panvk_CreateDevice(VkPhysicalDevice physicalDevice,
const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
{
VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
VkResult result = VK_ERROR_INITIALIZATION_FAILED;
panvk_arch_dispatch_ret(arch, create_device, result, physical_device,
@ -479,7 +479,7 @@ panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
VK_FROM_HANDLE(panvk_device, device, _device);
struct panvk_physical_device *physical_device =
to_panvk_physical_device(device->vk.physical);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
panvk_arch_dispatch(arch, destroy_device, device, pAllocator);
}
@ -535,7 +535,7 @@ get_image_plane_format_features(struct panvk_physical_device *physical_device,
VkFormatFeatureFlags2 features = 0;
enum pipe_format pfmt = vk_format_to_pipe_format(format);
const struct pan_format fmt = physical_device->formats.all[pfmt];
unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
if (!format_is_supported(physical_device, fmt, pfmt))
return 0;
@ -592,7 +592,7 @@ get_image_format_features(struct panvk_physical_device *physical_device,
{
const struct vk_format_ycbcr_info *ycbcr_info =
vk_format_get_ycbcr_info(format);
const unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
const unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
/* TODO: Bifrost YCbCr support */
if (ycbcr_info && arch <= 7)
@ -684,7 +684,7 @@ static VkFormatFeatureFlags2
get_image_format_sample_counts(struct panvk_physical_device *physical_device,
VkFormat format)
{
unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
unsigned arch = pan_arch(physical_device->kmod.props.gpu_id);
unsigned max_tib_size = pan_get_max_tib_size(arch, physical_device->model);
unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size);
@ -778,7 +778,7 @@ panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
static VkExtent3D
get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
{
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id);
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
const uint64_t max_img_size_B =
arch <= 10 ? u_uintN_max(32) : u_uintN_max(48);
const enum pipe_format pfmt = vk_format_to_pipe_format(format);
@ -801,7 +801,7 @@ get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
static VkExtent3D
get_max_3d_image_size(struct panvk_physical_device *phys_dev, VkFormat format)
{
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id);
const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id);
const uint64_t max_img_size_B =
arch <= 10 ? u_uintN_max(32) : u_uintN_max(48);
enum pipe_format pfmt = vk_format_to_pipe_format(format);

View file

@ -86,7 +86,7 @@ get_blend_shader(struct panvk_device *dev,
/* Compile the NIR shader */
struct pan_compile_inputs inputs = {
.gpu_id = pdev->kmod.props.gpu_prod_id,
.gpu_id = pdev->kmod.props.gpu_id,
.is_blend = true,
.blend = {
.nr_samples = key.info.nr_samples,

View file

@ -170,7 +170,7 @@ get_preload_shader(struct panvk_device *dev,
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
struct pan_compile_inputs inputs = {
.gpu_id = phys_dev->kmod.props.gpu_prod_id,
.gpu_id = phys_dev->kmod.props.gpu_id,
.is_blit = true,
};
@ -522,11 +522,11 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo,
*/
struct panvk_physical_device *pdev =
to_panvk_physical_device(dev->vk.physical);
unsigned gpu_id = pdev->kmod.props.gpu_prod_id;
unsigned gpu_prod_id = pdev->kmod.props.gpu_id >> 16;
/* the PAN_ARCH check is redundant but allows compiler optimization
when PAN_ARCH <= 6 */
if (PAN_ARCH > 6 && gpu_id >= 0x7200)
if (PAN_ARCH > 6 && gpu_prod_id >= 0x7200)
fbinfo->bifrost.pre_post.modes[dcd_idx] =
MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS;
else

View file

@ -549,7 +549,8 @@ panvk_per_arch(get_physical_device_properties)(
/* Collect arch_major, arch_minor, arch_rev and product_major,
* as done by the Arm driver.
*/
.deviceID = device->kmod.props.gpu_prod_id << 16,
.deviceID =
device->kmod.props.gpu_id & (ARCH_MAJOR | ARCH_MINOR | ARCH_REV | PRODUCT_MAJOR),
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
/* Vulkan 1.0 limits */
@ -947,7 +948,7 @@ panvk_per_arch(get_physical_device_properties)(
uint8_t pad[8];
} dev_uuid = {
.vendor_id = ARM_VENDOR_ID,
.device_id = device->model->gpu_id,
.device_id = properties->deviceID,
};
STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);

View file

@ -368,7 +368,7 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev,
UNUSED const struct vk_pipeline_robustness_state *rs)
{
struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev);
return pan_shader_get_compiler_options(pan_arch(phys_dev->kmod.props.gpu_prod_id));
return pan_shader_get_compiler_options(pan_arch(phys_dev->kmod.props.gpu_id));
}
static struct spirv_to_nir_options
@ -1282,7 +1282,7 @@ panvk_compile_shader(struct panvk_device *dev,
shader->own_bin = true;
struct pan_compile_inputs inputs = {
.gpu_id = phys_dev->kmod.props.gpu_prod_id,
.gpu_id = phys_dev->kmod.props.gpu_id,
.view_mask = (state && state->rp) ? state->rp->view_mask : 0,
};