diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index e4cee9c452d..8ba732d9c9d 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -116,7 +116,7 @@ GENX(pan_blend_get_shader_locked)(struct pan_blend_shader_cache *cache, #else NIR_PASS(_, nir, pan_lower_framebuffer, rt_formats, pan_raw_format_mask_midgard(rt_formats), MAX2(key.nr_samples, 1), - cache->gpu_id < 0x700); + (cache->gpu_id >> 16) < 0x700); #endif struct util_dynarray binary; diff --git a/src/gallium/drivers/panfrost/pan_device.c b/src/gallium/drivers/panfrost/pan_device.c index f04717647c8..de100c16697 100644 --- a/src/gallium/drivers/panfrost/pan_device.c +++ b/src/gallium/drivers/panfrost/pan_device.c @@ -62,9 +62,9 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) pan_kmod_dev_query_props(dev->kmod.dev, &dev->kmod.props); - dev->arch = pan_arch(dev->kmod.props.gpu_prod_id); - dev->model = - pan_get_model(dev->kmod.props.gpu_prod_id, dev->kmod.props.gpu_variant); + dev->arch = pan_arch(dev->kmod.props.gpu_id); + dev->model = pan_get_model(dev->kmod.props.gpu_id, + dev->kmod.props.gpu_variant); /* If we don't recognize the model, bail early */ if (!dev->model) diff --git a/src/gallium/drivers/panfrost/pan_device.h b/src/gallium/drivers/panfrost/pan_device.h index 949b7689886..b49087fef43 100644 --- a/src/gallium/drivers/panfrost/pan_device.h +++ b/src/gallium/drivers/panfrost/pan_device.h @@ -190,13 +190,19 @@ panfrost_device_fd(const struct panfrost_device *dev) static inline uint32_t panfrost_device_gpu_id(const struct panfrost_device *dev) { - return dev->kmod.props.gpu_prod_id; + return dev->kmod.props.gpu_id; +} + +static inline uint32_t +panfrost_device_gpu_prod_id(const struct panfrost_device *dev) +{ + return dev->kmod.props.gpu_id >> 16; } static inline uint32_t panfrost_device_gpu_rev(const struct panfrost_device *dev) { - return dev->kmod.props.gpu_revision; + return dev->kmod.props.gpu_id & BITFIELD_MASK(16); } static inline int diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c index b07b4431943..8472ae88da0 100644 --- a/src/gallium/drivers/panfrost/pan_fb_preload.c +++ b/src/gallium/drivers/panfrost/pan_fb_preload.c @@ -1274,7 +1274,7 @@ pan_preload_emit_pre_frame_dcd(struct pan_fb_preload_cache *cache, * The PAN_ARCH check is redundant but allows the compiler to optimize * when PAN_ARCH < 7. */ - if (PAN_ARCH >= 7 && cache->gpu_id >= 0x7200) + if (PAN_ARCH >= 7 && (cache->gpu_id >> 16) >= 0x7200) fb->bifrost.pre_post.modes[dcd_idx] = MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS; else diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c index 566f716c04b..97519de0d13 100644 --- a/src/gallium/drivers/panfrost/pan_screen.c +++ b/src/gallium/drivers/panfrost/pan_screen.c @@ -154,7 +154,7 @@ get_max_msaa(struct panfrost_device *dev, enum pipe_format format) /* t760 (GPU ID 0x750 - not a typo) has a HW issue in versions before * the r1p0 version, which prevents 16x MSAA from working properly. */ - if (panfrost_device_gpu_id(dev) == 0x750 && + if (panfrost_device_gpu_prod_id(dev) == 0x750 && panfrost_device_gpu_rev(dev) < 0x1000) max_msaa = MIN2(max_msaa, 8); @@ -990,7 +990,7 @@ panfrost_create_screen(int fd, const struct pipe_screen_config *config, /* Bail early on unsupported hardware */ if (dev->model == NULL) { debug_printf("panfrost: Unsupported model %X", - panfrost_device_gpu_id(dev)); + panfrost_device_gpu_prod_id(dev)); panfrost_destroy_screen(&(screen->base)); return NULL; } diff --git a/src/gallium/drivers/panfrost/pan_shader.c b/src/gallium/drivers/panfrost/pan_shader.c index 816afbb34df..1202fbd1689 100644 --- a/src/gallium/drivers/panfrost/pan_shader.c +++ b/src/gallium/drivers/panfrost/pan_shader.c @@ -197,7 +197,7 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir, if (dev->arch <= 5 && s->info.stage == MESA_SHADER_FRAGMENT) { NIR_PASS(_, s, pan_lower_framebuffer, key->fs.rt_formats, pan_raw_format_mask_midgard(key->fs.rt_formats), 0, - panfrost_device_gpu_id(dev) < 0x700); + panfrost_device_gpu_prod_id(dev) < 0x700); } if (s->info.stage == MESA_SHADER_VERTEX) diff --git a/src/panfrost/clc/pan_compile.c b/src/panfrost/clc/pan_compile.c index 9c5aa21b4f1..fe501fa8972 100644 --- a/src/panfrost/clc/pan_compile.c +++ b/src/panfrost/clc/pan_compile.c @@ -265,7 +265,7 @@ main(int argc, const char **argv) const char *output_h_path = argv[4]; const char *output_c_path = argv[5]; - int target_arch = atoi(target_arch_str); + unsigned target_arch = atoi(target_arch_str); if (target_arch < 4 || target_arch > 13) { fprintf(stderr, "Unsupported target arch %d\n", target_arch); @@ -337,8 +337,10 @@ main(int argc, const char **argv) libfunc, v, get_compiler_options(target_arch), &opt, load_kernel_input); + unsigned gpu_prod_id = (target_arch & 0xf) << 12; + struct pan_compile_inputs inputs = { - .gpu_id = target_arch << 12, + .gpu_id = gpu_prod_id << 16, }; nir_link_shader_functions(s, nir); diff --git a/src/panfrost/compiler/bi_quirks.h b/src/panfrost/compiler/bi_quirks.h index be05ed51a9c..81bd0abbcc7 100644 --- a/src/panfrost/compiler/bi_quirks.h +++ b/src/panfrost/compiler/bi_quirks.h @@ -42,9 +42,9 @@ #define BIFROST_LIMITED_CLPER (1 << 1) static inline unsigned -bifrost_get_quirks(unsigned product_id) +bifrost_get_quirks(unsigned gpu_id) { - switch (product_id >> 8) { + switch (gpu_id >> 24) { case 0x60: /* G71 */ return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER; case 0x62: /* G72 */ diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 3534a041eac..2de6a9bbdd0 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -5441,7 +5441,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) } /* This opt currently helps on Bifrost but not Valhall */ - if (gpu_id < 0x9000) + if (pan_arch(gpu_id) < 9) NIR_PASS(progress, nir, bifrost_nir_opt_boolean_bitwise); NIR_PASS(progress, nir, nir_lower_alu_to_scalar, bi_scalarize_filter, NULL); @@ -5849,8 +5849,8 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) * scratch access. */ glsl_type_size_align_func vars_to_scratch_size_align_func = - (gpu_id >= 0x9000) ? glsl_get_vec4_size_align_bytes - : glsl_get_natural_size_align_bytes; + (pan_arch(gpu_id) >= 9) ? glsl_get_vec4_size_align_bytes + : glsl_get_natural_size_align_bytes; /* Lower large arrays to scratch and small arrays to bcsel */ NIR_PASS(_, nir, nir_lower_scratch_to_var); NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256, @@ -5884,7 +5884,7 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) NIR_PASS(_, nir, bifrost_nir_lower_load_output); } else if (nir->info.stage == MESA_SHADER_VERTEX) { - if (gpu_id >= 0x9000) { + if (pan_arch(gpu_id) >= 9) { NIR_PASS(_, nir, nir_lower_mediump_io, nir_var_shader_out, VARYING_BIT_PSIZ, false); } @@ -6377,7 +6377,7 @@ bi_should_idvs(nir_shader *nir, const struct pan_compile_inputs *inputs) return false; /* Bifrost cannot write gl_PointSize during IDVS */ - if ((inputs->gpu_id < 0x9000) && + if ((pan_arch(inputs->gpu_id) < 9) && nir->info.outputs_written & VARYING_BIT_PSIZ) return false; diff --git a/src/panfrost/compiler/cmdline.c b/src/panfrost/compiler/cmdline.c index 4e6eb6632ee..9982bfc6755 100644 --- a/src/panfrost/compiler/cmdline.c +++ b/src/panfrost/compiler/cmdline.c @@ -29,8 +29,9 @@ #include "bifrost/disassemble.h" #include "util/macros.h" #include "valhall/disassemble.h" +#include "panfrost/lib/pan_props.h" -unsigned gpu_id = 0x7212; +unsigned gpu_id = 0x72120000; int verbose = 0; #define BI_FOURCC(ch0, ch1, ch2, ch3) \ @@ -70,7 +71,7 @@ disassemble(const char *filename) } } - if ((gpu_id >> 12) >= 9) + if (pan_arch(gpu_id) >= 9) disassemble_valhall(stdout, entrypoint, filesize, verbose); else disassemble_bifrost(stdout, entrypoint, filesize, verbose); @@ -115,6 +116,7 @@ main(int argc, char **argv) return 1; } + gpu_id <<= 16; break; case 'g': gpu_id = 0; diff --git a/src/panfrost/lib/kmod/pan_kmod.h b/src/panfrost/lib/kmod/pan_kmod.h index cff86351fd3..c15b2650740 100644 --- a/src/panfrost/lib/kmod/pan_kmod.h +++ b/src/panfrost/lib/kmod/pan_kmod.h @@ -149,11 +149,8 @@ struct pan_kmod_bo { /* List of GPU properties needed by the UMD. */ struct pan_kmod_dev_props { - /* GPU product ID. */ - uint32_t gpu_prod_id; - - /* GPU revision. */ - uint32_t gpu_revision; + /* GPU ID. */ + uint32_t gpu_id; /* GPU variant. */ uint32_t gpu_variant; diff --git a/src/panfrost/lib/kmod/panfrost_kmod.c b/src/panfrost/lib/kmod/panfrost_kmod.c index 1dbac0bb8b5..2df3050984a 100644 --- a/src/panfrost/lib/kmod/panfrost_kmod.c +++ b/src/panfrost/lib/kmod/panfrost_kmod.c @@ -105,7 +105,7 @@ panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev, props->max_threads_per_core = panfrost_query_raw(fd, DRM_PANFROST_PARAM_MAX_THREADS, true, 0); if (!props->max_threads_per_core) { - switch (pan_arch(props->gpu_prod_id)) { + switch (pan_arch(props->gpu_id)) { case 4: case 5: props->max_threads_per_core = 256; @@ -141,7 +141,7 @@ panfrost_dev_query_thread_props(const struct pan_kmod_dev *dev, props->max_tasks_per_core = MAX2(thread_features >> 24, 1); props->num_registers_per_core = thread_features & 0xffff; if (!props->num_registers_per_core) { - switch (pan_arch(props->gpu_prod_id)) { + switch (pan_arch(props->gpu_id)) { case 4: case 5: /* Assume we can always schedule max_threads_per_core when using 4 @@ -183,9 +183,8 @@ panfrost_dev_query_props(const struct pan_kmod_dev *dev, int fd = dev->fd; memset(props, 0, sizeof(*props)); - props->gpu_prod_id = - panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0); - props->gpu_revision = + props->gpu_id = + (panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0) << 16) | panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0); props->shader_present = panfrost_query_raw(fd, DRM_PANFROST_PARAM_SHADER_PRESENT, true, 0); diff --git a/src/panfrost/lib/kmod/panthor_kmod.c b/src/panfrost/lib/kmod/panthor_kmod.c index e43622c5972..95c17f8f825 100644 --- a/src/panfrost/lib/kmod/panthor_kmod.c +++ b/src/panfrost/lib/kmod/panthor_kmod.c @@ -266,8 +266,7 @@ panthor_dev_query_props(const struct pan_kmod_dev *dev, container_of(dev, struct panthor_kmod_dev, base); *props = (struct pan_kmod_dev_props){ - .gpu_prod_id = panthor_dev->props.gpu.gpu_id >> 16, - .gpu_revision = panthor_dev->props.gpu.gpu_id & 0xffff, + .gpu_id = panthor_dev->props.gpu.gpu_id, .gpu_variant = panthor_dev->props.gpu.core_features & 0xff, .shader_present = panthor_dev->props.gpu.shader_present, .tiler_features = panthor_dev->props.gpu.tiler_features, diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c index 410c8a6aeb4..4f9e070bda7 100644 --- a/src/panfrost/lib/pan_props.c +++ b/src/panfrost/lib/pan_props.c @@ -32,61 +32,105 @@ #include -/* Fixed "minimum revisions" */ -#define NO_ANISO (~0) -#define HAS_ANISO (0) +/* GPU revision (rXpY) */ +#define GPU_REV(X, Y) (((X) & 0xf) << 12 | ((Y) & 0xff) << 4) -#define MODEL(gpu_id_, gpu_variant_, shortname, counters_, \ - min_rev_anisotropic_, tib_size_, tib_z_size_, ...) \ +/* Fixed "minimum revisions" */ +#define GPU_REV_NONE (~0) +#define GPU_REV_ALL GPU_REV(0, 0) +#define GPU_REV_R0P3 GPU_REV(0, 3) +#define GPU_REV_R1P1 GPU_REV(1, 1) + +#define MODEL(gpu_prod_id_, gpu_prod_id_mask_, gpu_variant_, shortname, \ + counters, ...) \ { \ - .gpu_id = gpu_id_, .gpu_variant = gpu_variant_, \ + .gpu_prod_id = gpu_prod_id_, \ + .gpu_prod_id_mask = gpu_prod_id_mask_, \ + .gpu_variant = gpu_variant_, \ .name = "Mali-" shortname, \ - .performance_counters = counters_, \ - .min_rev_anisotropic = min_rev_anisotropic_, \ - .tilebuffer_size = tib_size_, \ - .tilebuffer_z_size = tib_z_size_, \ - .quirks = { __VA_ARGS__ }, \ + .performance_counters = counters, \ + ##__VA_ARGS__, \ } +#define MIDGARD_MODEL(gpu_prod_id, shortname, counters, ...) \ + MODEL(gpu_prod_id << 16, 0xffff0000, 0, shortname, counters, ##__VA_ARGS__) + +#define BIFROST_MODEL(gpu_prod_id, shortname, counters, ...) \ + MODEL(gpu_prod_id << 16, ARCH_MAJOR | ARCH_MINOR | PRODUCT_MAJOR, 0, \ + shortname, counters, ##__VA_ARGS__) + +#define VALHALL_MODEL(gpu_prod_id, gpu_variant, shortname, counters, ...) \ + MODEL(gpu_prod_id << 16, ARCH_MAJOR | ARCH_MINOR | PRODUCT_MAJOR, \ + gpu_variant, shortname, counters, ##__VA_ARGS__) + +#define AVALON_MODEL(gpu_prod_id, gpu_variant, shortname, counters, ...) \ + MODEL(gpu_prod_id << 16, ARCH_MAJOR | ARCH_MINOR | PRODUCT_MAJOR, \ + gpu_variant, shortname, counters, ##__VA_ARGS__) + +#define MODEL_ANISO(rev) .min_rev_anisotropic = GPU_REV_##rev + +#define MODEL_TB_SIZES(color_tb_size, z_tb_size) \ + .tilebuffer = { \ + .color_size = color_tb_size, \ + .z_size = z_tb_size, \ + } + +#define MODEL_QUIRKS(...) .quirks = {__VA_ARGS__} + /* Table of supported Mali GPUs */ /* clang-format off */ const struct pan_model pan_model_list[] = { - MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, 8192, .max_4x_msaa = true), - MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, 8192), - MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, 8192, .no_hierarchical_tiling = true, .max_4x_msaa = true), - MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, 8192), - MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, 8192, .no_hierarchical_tiling = true, .max_4x_msaa = true), - MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, 8192, .no_hierarchical_tiling = true, .max_4x_msaa = true), - MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, 8192), - MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, 8192), + MIDGARD_MODEL(0x600, "T600", "T60x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192), + MODEL_QUIRKS( .max_4x_msaa = true )), + MIDGARD_MODEL(0x620, "T620", "T62x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), + MIDGARD_MODEL(0x720, "T720", "T72x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192), + MODEL_QUIRKS( .no_hierarchical_tiling = true, .max_4x_msaa = true )), + MIDGARD_MODEL(0x750, "T760", "T76x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), + MIDGARD_MODEL(0x820, "T820", "T82x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192), + MODEL_QUIRKS( .no_hierarchical_tiling = true, .max_4x_msaa = true )), + MIDGARD_MODEL(0x830, "T830", "T83x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192), + MODEL_QUIRKS( .no_hierarchical_tiling = true, .max_4x_msaa = true )), + MIDGARD_MODEL(0x860, "T860", "T86x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), + MIDGARD_MODEL(0x880, "T880", "T88x", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), - MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, 8192), - MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, 8192), - MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 8192, 8192), - MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 8192, 8192), - MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, 8192), - MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, 8192), - MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 8192, 8192), - MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192), - MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192), + BIFROST_MODEL(0x6000, "G71", "TMIx", MODEL_ANISO(NONE), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(0x6201, "G72", "THEx", MODEL_ANISO(R0P3), MODEL_TB_SIZES(16384, 8192)), + BIFROST_MODEL(0x7000, "G51", "TSIx", MODEL_ANISO(R1P1), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(0x7003, "G31", "TDVx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), + BIFROST_MODEL(0x7201, "G76", "TNOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + BIFROST_MODEL(0x7202, "G52", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + BIFROST_MODEL(0x7402, "G52 r1", "TGOx", MODEL_ANISO(ALL), MODEL_TB_SIZES( 8192, 8192)), - MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, 16384), - MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, 8192), - MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, 8192), - MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, 8192), - MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, 16384), - MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, 16384), + VALHALL_MODEL(0x9001, 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + VALHALL_MODEL(0x9003, 0, "G57", "TNAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + VALHALL_MODEL(0xa807, 0, "G610", "TVIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384)), + VALHALL_MODEL(0xac04, 0, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + VALHALL_MODEL(0xac04, 1, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + VALHALL_MODEL(0xac04, 2, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(16384, 8192)), + VALHALL_MODEL(0xac04, 3, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384)), + VALHALL_MODEL(0xac04, 4, "G310", "TVAx", MODEL_ANISO(ALL), MODEL_TB_SIZES(32768, 16384)), - MODEL(0xc870, 4, "G720", "TTIx", HAS_ANISO, 65536, 32768), - - MODEL(0xd830, 4, "G725", "TKRx", HAS_ANISO, 65536, 65536), + AVALON_MODEL( 0xc800, 4, "G720", "TTIx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 32768)), + AVALON_MODEL( 0xd800, 4, "G725", "TKRx", MODEL_ANISO(ALL), MODEL_TB_SIZES(65536, 65536)), }; /* clang-format on */ -#undef NO_ANISO -#undef HAS_ANISO +#undef GPU_REV +#undef GPU_REV_NONE +#undef GPU_REV_ALL +#undef GPU_REV_R0P3 +#undef GPU_REV_R1P1 + +#undef MIDGARD_MODEL +#undef BIFROST_MODEL +#undef VALHALL_MODEL +#undef AVALON_MODEL #undef MODEL +#undef MODEL_ANISO +#undef MODEL_TB_SIZES +#undef MODEL_QUIRKS + /* * Look up a supported model by its GPU ID, or return NULL if the model is not * supported at this time. @@ -95,7 +139,9 @@ const struct pan_model * pan_get_model(uint32_t gpu_id, uint32_t gpu_variant) { for (unsigned i = 0; i < ARRAY_SIZE(pan_model_list); ++i) { - if (pan_model_list[i].gpu_id == gpu_id && + uint32_t gpu_prod_id = gpu_id & pan_model_list[i].gpu_prod_id_mask; + + if (pan_model_list[i].gpu_prod_id == gpu_prod_id && pan_model_list[i].gpu_variant == gpu_variant) return &pan_model_list[i]; } @@ -156,7 +202,7 @@ pan_compute_max_thread_count(const struct pan_kmod_dev_props *props, /* 4, 8 or 16 registers per shader on Midgard * 32 or 64 registers per shader on Bifrost */ - if (pan_arch(props->gpu_prod_id) <= 5) { + if (pan_arch(props->gpu_id) <= 5) { aligned_reg_count = util_next_power_of_two(MAX2(work_reg_count, 4)); assert(aligned_reg_count <= 16); } else { @@ -181,7 +227,7 @@ pan_query_afbc(const struct pan_kmod_dev_props *props) { unsigned reg = props->afbc_features; - return (pan_arch(props->gpu_prod_id) >= 5) && (reg == 0); + return (pan_arch(props->gpu_id) >= 5) && (reg == 0); } /* Check for AFRC hardware support. AFRC is introduced in v10. Implementations @@ -190,7 +236,7 @@ pan_query_afbc(const struct pan_kmod_dev_props *props) bool pan_query_afrc(const struct pan_kmod_dev_props *props) { - return (pan_arch(props->gpu_prod_id) >= 10) && + return (pan_arch(props->gpu_id) >= 10) && (props->texture_features[0] & (1 << 25)); } @@ -207,10 +253,10 @@ pan_query_optimal_tib_size(const struct pan_model *model) /* Preconditions ensure the returned value is a multiple of 1 KiB, the * granularity of the colour buffer allocation field. */ - assert(model->tilebuffer_size >= 2048); - assert(util_is_power_of_two_nonzero(model->tilebuffer_size)); + assert(model->tilebuffer.color_size >= 2048); + assert(util_is_power_of_two_nonzero(model->tilebuffer.color_size)); - return model->tilebuffer_size / 2; + return model->tilebuffer.color_size / 2; } unsigned @@ -219,10 +265,10 @@ pan_query_optimal_z_tib_size(const struct pan_model *model) /* Preconditions ensure the returned value is a multiple of 1 KiB, the * granularity of the colour buffer allocation field. */ - assert(model->tilebuffer_z_size >= 1024); - assert(util_is_power_of_two_nonzero(model->tilebuffer_z_size)); + assert(model->tilebuffer.z_size >= 1024); + assert(util_is_power_of_two_nonzero(model->tilebuffer.z_size)); - return model->tilebuffer_z_size / 2; + return model->tilebuffer.z_size / 2; } uint64_t diff --git a/src/panfrost/lib/pan_props.h b/src/panfrost/lib/pan_props.h index bef13f3b48f..b7c682bfc86 100644 --- a/src/panfrost/lib/pan_props.h +++ b/src/panfrost/lib/pan_props.h @@ -45,9 +45,20 @@ struct pan_tiler_features { unsigned max_levels; }; +#define ARCH_MAJOR BITFIELD_RANGE(28, 4) +#define ARCH_MINOR BITFIELD_RANGE(24, 4) +#define ARCH_REV BITFIELD_RANGE(20, 4) +#define PRODUCT_MAJOR BITFIELD_RANGE(16, 4) +#define VERSION_MAJOR BITFIELD_RANGE(12, 4) +#define VERSION_MINOR BITFIELD_RANGE(4, 8) +#define VERSION_STATUS BITFIELD_RANGE(0, 4) + struct pan_model { - /* GPU ID */ - uint32_t gpu_id; + /* GPU product ID */ + uint32_t gpu_prod_id; + + /* Mask to apply to the GPU ID to get a product ID. */ + uint32_t gpu_prod_id_mask; /* GPU variant. */ uint32_t gpu_variant; @@ -65,11 +76,13 @@ struct pan_model { */ uint32_t min_rev_anisotropic; - /* Default tilebuffer size in bytes for the model. */ - unsigned tilebuffer_size; + struct { + /* Default tilebuffer size in bytes for the model. */ + uint32_t color_size; - /* Default tilebuffer depth size in bytes for the model. */ - unsigned tilebuffer_z_size; + /* Default tilebuffer depth size in bytes for the model. */ + uint32_t z_size; + } tilebuffer; struct { /* The GPU lacks the capability for hierarchical tiling, without @@ -116,7 +129,7 @@ unsigned pan_compute_max_thread_count(const struct pan_kmod_dev_props *props, static inline unsigned pan_arch(unsigned gpu_id) { - switch (gpu_id) { + switch (gpu_id >> 16) { case 0x600: case 0x620: case 0x720: @@ -128,7 +141,7 @@ pan_arch(unsigned gpu_id) case 0x880: return 5; default: - return gpu_id >> 12; + return gpu_id >> 28; } } diff --git a/src/panfrost/midgard/midgard_quirks.h b/src/panfrost/midgard/midgard_quirks.h index fd7f797e04b..ae0acea3f8c 100644 --- a/src/panfrost/midgard/midgard_quirks.h +++ b/src/panfrost/midgard/midgard_quirks.h @@ -73,7 +73,7 @@ static inline unsigned midgard_get_quirks(unsigned gpu_id) { - switch (gpu_id) { + switch (gpu_id >> 16) { case 0x600: return MIDGARD_OLD_BLEND | MIDGARD_BROKEN_BLEND_LOADS | MIDGARD_BROKEN_LOD | MIDGARD_NO_UPPER_ALU | MIDGARD_NO_OOO | diff --git a/src/panfrost/perf/pan_perf.c b/src/panfrost/perf/pan_perf.c index 636c43c0184..1fc994669ac 100644 --- a/src/panfrost/perf/pan_perf.c +++ b/src/panfrost/perf/pan_perf.c @@ -86,7 +86,7 @@ pan_perf_init(struct pan_perf *perf, int fd) pan_kmod_dev_query_props(perf->dev, &props); const struct pan_model *model = - pan_get_model(props.gpu_prod_id, props.gpu_variant); + pan_get_model(props.gpu_id, props.gpu_variant); if (model == NULL) unreachable("Invalid GPU ID"); diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index 1bf0b0696d0..dce9139b290 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -281,7 +281,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev) nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_COMPUTE, pan_shader_get_compiler_options( - pan_arch(phys_dev->kmod.props.gpu_prod_id)), + pan_arch(phys_dev->kmod.props.gpu_id)), "%s", "desc_copy"); /* We actually customize that at execution time to issue the @@ -295,7 +295,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev) single_desc_copy(&b, desc_copy_id); struct pan_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, + .gpu_id = phys_dev->kmod.props.gpu_id, }; pan_shader_preprocess(b.shader, inputs.gpu_id); diff --git a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c index 98601f00ea4..4160c33b128 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c @@ -509,7 +509,7 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue) subqueue); pandecode_cs_binary(dev->debug.decode_ctx, qsubmit.stream_addr, qsubmit.stream_size, - phys_dev->kmod.props.gpu_prod_id); + phys_dev->kmod.props.gpu_id); } return VK_SUCCESS; @@ -1123,7 +1123,7 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit) pandecode_user_msg(decode_ctx, "CS %d on subqueue %d binaries\n\n", i, qsubmit->queue_index); pandecode_cs_binary(decode_ctx, qsubmit->stream_addr, - qsubmit->stream_size, props->gpu_prod_id); + qsubmit->stream_size, props->gpu_id); pandecode_user_msg(decode_ctx, "\n"); } @@ -1148,7 +1148,7 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit) uint64_t trace = queue->subqueues[i].tracebuf.addr.dev; pandecode_user_msg(decode_ctx, "\nCS traces on subqueue %d\n\n", i); - pandecode_cs_trace(decode_ctx, trace, trace_size, props->gpu_prod_id); + pandecode_cs_trace(decode_ctx, trace, trace_size, props->gpu_id); pandecode_user_msg(decode_ctx, "\n"); } } diff --git a/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c index 95f0601f2b7..a7756f290f3 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c +++ b/src/panfrost/vulkan/jm/panvk_vX_gpu_queue.c @@ -80,7 +80,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc if (debug & PANVK_DEBUG_TRACE) { pandecode_jc(dev->debug.decode_ctx, batch->vtc_jc.first_job, - phys_dev->kmod.props.gpu_prod_id); + phys_dev->kmod.props.gpu_id); } if (debug & PANVK_DEBUG_DUMP) @@ -88,7 +88,7 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc if (debug & PANVK_DEBUG_SYNC) pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc, - phys_dev->kmod.props.gpu_prod_id); + phys_dev->kmod.props.gpu_id); } if (batch->frag_jc.first_job) { @@ -118,14 +118,14 @@ panvk_queue_submit_batch(struct panvk_gpu_queue *queue, struct panvk_batch *batc if (debug & PANVK_DEBUG_TRACE) pandecode_jc(dev->debug.decode_ctx, batch->frag_jc.first_job, - phys_dev->kmod.props.gpu_prod_id); + phys_dev->kmod.props.gpu_id); if (debug & PANVK_DEBUG_DUMP) pandecode_dump_mappings(dev->debug.decode_ctx); if (debug & PANVK_DEBUG_SYNC) pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc, - phys_dev->kmod.props.gpu_prod_id); + phys_dev->kmod.props.gpu_id); } if (debug & PANVK_DEBUG_TRACE) diff --git a/src/panfrost/vulkan/panvk_buffer.c b/src/panfrost/vulkan/panvk_buffer.c index b38e53fbef1..abbaf18e376 100644 --- a/src/panfrost/vulkan/panvk_buffer.c +++ b/src/panfrost/vulkan/panvk_buffer.c @@ -57,7 +57,7 @@ panvk_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount, VK_FROM_HANDLE(panvk_device, device, _device); const struct panvk_physical_device *phys_dev = to_panvk_physical_device(device->vk.physical); - const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id); + const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); VkResult result = VK_SUCCESS; for (uint32_t i = 0; i < bindInfoCount; i++) { diff --git a/src/panfrost/vulkan/panvk_image.c b/src/panfrost/vulkan/panvk_image.c index b7f25a8e6c9..9c6e1df0a5c 100644 --- a/src/panfrost/vulkan/panvk_image.c +++ b/src/panfrost/vulkan/panvk_image.c @@ -51,7 +51,7 @@ panvk_image_can_use_mod(struct panvk_image *image, uint64_t mod) { struct panvk_physical_device *phys_dev = to_panvk_physical_device(image->vk.base.device->physical); - unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id); + unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); struct panvk_instance *instance = to_panvk_instance(image->vk.base.device->physical->instance); enum pipe_format pfmt = vk_format_to_pipe_format(image->vk.format); @@ -250,7 +250,7 @@ panvk_image_init_layouts(struct panvk_image *image, { struct panvk_physical_device *phys_dev = to_panvk_physical_device(image->vk.base.device->physical); - unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id); + unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_info = vk_find_struct_const( pCreateInfo->pNext, diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c index f56f965f131..0a205a93774 100644 --- a/src/panfrost/vulkan/panvk_physical_device.c +++ b/src/panfrost/vulkan/panvk_physical_device.c @@ -184,7 +184,7 @@ static VkResult get_device_sync_types(struct panvk_physical_device *device, const struct panvk_instance *instance) { - const unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); + const unsigned arch = pan_arch(device->kmod.props.gpu_id); uint32_t sync_type_count = 0; device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd); @@ -254,15 +254,15 @@ panvk_physical_device_init(struct panvk_physical_device *device, pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props); - device->model = pan_get_model(device->kmod.props.gpu_prod_id, + device->model = pan_get_model(device->kmod.props.gpu_id, device->kmod.props.gpu_variant); - unsigned arch = pan_arch(device->kmod.props.gpu_prod_id); + unsigned arch = pan_arch(device->kmod.props.gpu_id); if (!device->model) { result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Unknown gpu_id (%#x) or variant (%#x)", - device->kmod.props.gpu_prod_id, + device->kmod.props.gpu_id, device->kmod.props.gpu_variant); goto fail; } @@ -300,7 +300,7 @@ panvk_physical_device_init(struct panvk_physical_device *device, memset(device->name, 0, sizeof(device->name)); sprintf(device->name, "%s", device->model->name); - if (get_cache_uuid(device->kmod.props.gpu_prod_id, device->cache_uuid)) { + if (get_cache_uuid(device->kmod.props.gpu_id, device->cache_uuid)) { result = panvk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID"); goto fail; @@ -397,7 +397,7 @@ panvk_GetPhysicalDeviceQueueFamilyProperties2( VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties, pQueueFamilyPropertyCount); - unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); + unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) { @@ -464,7 +464,7 @@ panvk_CreateDevice(VkPhysicalDevice physicalDevice, const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) { VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); - unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); + unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); VkResult result = VK_ERROR_INITIALIZATION_FAILED; panvk_arch_dispatch_ret(arch, create_device, result, physical_device, @@ -479,7 +479,7 @@ panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) VK_FROM_HANDLE(panvk_device, device, _device); struct panvk_physical_device *physical_device = to_panvk_physical_device(device->vk.physical); - unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); + unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); panvk_arch_dispatch(arch, destroy_device, device, pAllocator); } @@ -535,7 +535,7 @@ get_image_plane_format_features(struct panvk_physical_device *physical_device, VkFormatFeatureFlags2 features = 0; enum pipe_format pfmt = vk_format_to_pipe_format(format); const struct pan_format fmt = physical_device->formats.all[pfmt]; - unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); + unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); if (!format_is_supported(physical_device, fmt, pfmt)) return 0; @@ -592,7 +592,7 @@ get_image_format_features(struct panvk_physical_device *physical_device, { const struct vk_format_ycbcr_info *ycbcr_info = vk_format_get_ycbcr_info(format); - const unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); + const unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); /* TODO: Bifrost YCbCr support */ if (ycbcr_info && arch <= 7) @@ -684,7 +684,7 @@ static VkFormatFeatureFlags2 get_image_format_sample_counts(struct panvk_physical_device *physical_device, VkFormat format) { - unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id); + unsigned arch = pan_arch(physical_device->kmod.props.gpu_id); unsigned max_tib_size = pan_get_max_tib_size(arch, physical_device->model); unsigned max_cbuf_atts = pan_get_max_cbufs(arch, max_tib_size); @@ -778,7 +778,7 @@ panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, static VkExtent3D get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format) { - const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id); + const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); const uint64_t max_img_size_B = arch <= 10 ? u_uintN_max(32) : u_uintN_max(48); const enum pipe_format pfmt = vk_format_to_pipe_format(format); @@ -801,7 +801,7 @@ get_max_2d_image_size(struct panvk_physical_device *phys_dev, VkFormat format) static VkExtent3D get_max_3d_image_size(struct panvk_physical_device *phys_dev, VkFormat format) { - const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_prod_id); + const unsigned arch = pan_arch(phys_dev->kmod.props.gpu_id); const uint64_t max_img_size_B = arch <= 10 ? u_uintN_max(32) : u_uintN_max(48); enum pipe_format pfmt = vk_format_to_pipe_format(format); diff --git a/src/panfrost/vulkan/panvk_vX_blend.c b/src/panfrost/vulkan/panvk_vX_blend.c index 32ce2190e22..92a3467fc8d 100644 --- a/src/panfrost/vulkan/panvk_vX_blend.c +++ b/src/panfrost/vulkan/panvk_vX_blend.c @@ -86,7 +86,7 @@ get_blend_shader(struct panvk_device *dev, /* Compile the NIR shader */ struct pan_compile_inputs inputs = { - .gpu_id = pdev->kmod.props.gpu_prod_id, + .gpu_id = pdev->kmod.props.gpu_id, .is_blend = true, .blend = { .nr_samples = key.info.nr_samples, diff --git a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c index 57ba32abb1b..a0d865a2b51 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c @@ -170,7 +170,7 @@ get_preload_shader(struct panvk_device *dev, nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); struct pan_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, + .gpu_id = phys_dev->kmod.props.gpu_id, .is_blit = true, }; @@ -522,11 +522,11 @@ cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf, struct pan_fb_info *fbinfo, */ struct panvk_physical_device *pdev = to_panvk_physical_device(dev->vk.physical); - unsigned gpu_id = pdev->kmod.props.gpu_prod_id; + unsigned gpu_prod_id = pdev->kmod.props.gpu_id >> 16; /* the PAN_ARCH check is redundant but allows compiler optimization when PAN_ARCH <= 6 */ - if (PAN_ARCH > 6 && gpu_id >= 0x7200) + if (PAN_ARCH > 6 && gpu_prod_id >= 0x7200) fbinfo->bifrost.pre_post.modes[dcd_idx] = MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS; else diff --git a/src/panfrost/vulkan/panvk_vX_physical_device.c b/src/panfrost/vulkan/panvk_vX_physical_device.c index a0cd1fa1742..6c2c45a621b 100644 --- a/src/panfrost/vulkan/panvk_vX_physical_device.c +++ b/src/panfrost/vulkan/panvk_vX_physical_device.c @@ -549,7 +549,8 @@ panvk_per_arch(get_physical_device_properties)( /* Collect arch_major, arch_minor, arch_rev and product_major, * as done by the Arm driver. */ - .deviceID = device->kmod.props.gpu_prod_id << 16, + .deviceID = + device->kmod.props.gpu_id & (ARCH_MAJOR | ARCH_MINOR | ARCH_REV | PRODUCT_MAJOR), .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, /* Vulkan 1.0 limits */ @@ -947,7 +948,7 @@ panvk_per_arch(get_physical_device_properties)( uint8_t pad[8]; } dev_uuid = { .vendor_id = ARM_VENDOR_ID, - .device_id = device->model->gpu_id, + .device_id = properties->deviceID, }; STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE); diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 57a9fd1073e..421517b350d 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -368,7 +368,7 @@ panvk_get_nir_options(UNUSED struct vk_physical_device *vk_pdev, UNUSED const struct vk_pipeline_robustness_state *rs) { struct panvk_physical_device *phys_dev = to_panvk_physical_device(vk_pdev); - return pan_shader_get_compiler_options(pan_arch(phys_dev->kmod.props.gpu_prod_id)); + return pan_shader_get_compiler_options(pan_arch(phys_dev->kmod.props.gpu_id)); } static struct spirv_to_nir_options @@ -1282,7 +1282,7 @@ panvk_compile_shader(struct panvk_device *dev, shader->own_bin = true; struct pan_compile_inputs inputs = { - .gpu_id = phys_dev->kmod.props.gpu_prod_id, + .gpu_id = phys_dev->kmod.props.gpu_id, .view_mask = (state && state->rp) ? state->rp->view_mask : 0, };