pan: Add support for 64 bit gpu_id

While not currently required, it will be for future GPUs. Also cleans up gpu_id as parameter to some functions that didn't use it. Reviewed-by: Aksel Hjerpbakk <aksel.hjerpbakk@arm.com> Reviewed-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40610>
2026-05-07 00:38:48 +02:00 · 2026-03-23 14:05:55 +01:00 · 2026-03-23 14:05:55 +01:00 · 6cdc3cc1d2
commit 6cdc3cc1d2
parent f181cc5bca
29 changed files with 152 additions and 145 deletions
--- a/src/panfrost/clc/pan_compile.c
+++ b/src/panfrost/clc/pan_compile.c
@ -353,10 +353,10 @@ main(int argc, const char **argv)
            libfunc, MESA_SHADER_COMPUTE, v, get_compiler_options(target_arch),
            &opt, load_kernel_input);

-         unsigned gpu_prod_id = (target_arch & 0xf) << 12;
+         uint64_t target_gpu_id = (target_arch & 0xf) << 28;

         struct pan_compile_inputs inputs = {
-            .gpu_id = gpu_prod_id << 16,
+            .gpu_id = target_gpu_id,
            .gpu_variant = 0,
         };

--- a/src/panfrost/compiler/bifrost/bi_quirks.h
+++ b/src/panfrost/compiler/bifrost/bi_quirks.h
@ -6,6 +6,8 @@
 #ifndef __BI_QUIRKS_H
 #define __BI_QUIRKS_H

+#include "panfrost/model/pan_model.h"
+
 /* Model-specific quirks requiring compiler workarounds/etc. Quirks
 * may be errata requiring a workaround, or features. We're trying to be
 * quirk-positive here; quirky is the best! */
@ -24,9 +26,11 @@
 #define BIFROST_LIMITED_CLPER (1 << 1)

 static inline unsigned
-bifrost_get_quirks(unsigned gpu_id)
+bifrost_get_quirks(uint64_t gpu_id)
 {
-   switch (gpu_id >> 24) {
+   unsigned arch_maj_min =
+      (PAN_ARCH_MAJOR(gpu_id) << 4) | PAN_ARCH_MINOR(gpu_id);
+   switch (arch_maj_min) {
   case 0x60: /* G71 */
      return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
   case 0x62: /* G72 */
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@ -4964,7 +4964,7 @@ va_count_stats(bi_context *ctx, unsigned nr_ins, unsigned size,

   if (model == NULL) {
      /* Get G57 by default: */
-      model = pan_get_model(((uint32_t)0x9001) << 16, 0);
+      model = pan_get_model(((uint64_t)0x9001) << 16, 0);
      assert(model);
   }

--- a/src/panfrost/compiler/bifrost/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.h
@ -62,11 +62,11 @@ bifrost_precompiled_kernel_prepare_push_uniforms(
          user_data_size);
 }

-void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
-void bifrost_optimize_nir(nir_shader *nir, unsigned gpu_id);
-void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
-void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
-void bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id);
+void bifrost_preprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void bifrost_optimize_nir(nir_shader *nir, uint64_t gpu_id);
+void bifrost_postprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void bifrost_lower_texture_nir(nir_shader *nir, uint64_t gpu_id);
+void bifrost_lower_texture_late_nir(nir_shader *nir, uint64_t gpu_id);

 void bifrost_compile_shader_nir(nir_shader *nir,
                                const struct pan_compile_inputs *inputs,
--- a/src/panfrost/compiler/bifrost/bifrost_nir.c
+++ b/src/panfrost/compiler/bifrost/bifrost_nir.c
@ -29,7 +29,7 @@ bi_lower_bit_size(const nir_instr *instr, void *data)
   switch (instr->type) {
   case nir_instr_type_alu: {
      nir_alu_instr *alu = nir_instr_as_alu(instr);
-      unsigned gpu_id = *((unsigned *)data);
+      uint64_t gpu_id = *((uint64_t *)data);

      switch (alu->op) {
      case nir_op_fexp2:
@ -95,7 +95,7 @@ bi_lower_bit_size(const nir_instr *instr, void *data)
 static uint8_t
 bi_vectorize_filter(const nir_instr *instr, const void *data)
 {
-   unsigned gpu_id = *((unsigned *)data);
+   uint64_t gpu_id = *((uint64_t *)data);

   if (instr->type == nir_instr_type_phi) {
      unsigned bit_size = nir_instr_as_phi(instr)->def.bit_size;
@ -191,7 +191,7 @@ mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size,
 }

 static void
-bi_optimize_loop_nir(nir_shader *nir, unsigned gpu_id, bool allow_copies)
+bi_optimize_loop_nir(nir_shader *nir, uint64_t gpu_id, bool allow_copies)
 {
   bool progress;

@ -268,13 +268,14 @@ bi_optimize_loop_nir(nir_shader *nir, unsigned gpu_id, bool allow_copies)
 }

 void
-bifrost_optimize_nir(nir_shader *nir, unsigned gpu_id)
+bifrost_optimize_nir(nir_shader *nir, uint64_t gpu_id)
 {
   bi_optimize_loop_nir(nir, gpu_id, true);
 }

 static void
-bi_optimize_nir(nir_shader *nir, unsigned gpu_id, nir_variable_mode robust_modes)
+bi_optimize_nir(nir_shader *nir, uint64_t gpu_id,
+                nir_variable_mode robust_modes)
 {
   NIR_PASS(_, nir, nir_opt_shrink_stores, true);
   bi_optimize_loop_nir(nir, gpu_id, false);
@ -378,7 +379,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, nir_variable_mode robust_modes
 }

 void
-bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
+bifrost_preprocess_nir(nir_shader *nir, uint64_t gpu_id)
 {
   MESA_TRACE_FUNC();

@ -513,7 +514,7 @@ bi_fp32_varying_mask(nir_shader *nir)
 static bool
 bi_lower_subgroups(nir_builder *b, nir_intrinsic_instr *intr, void *data)
 {
-   unsigned int gpu_id = *(unsigned int *)data;
+   uint64_t gpu_id = *(uint64_t *)data;
   unsigned int arch = pan_arch(gpu_id);

   b->cursor = nir_before_instr(&intr->instr);
@ -670,7 +671,7 @@ mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
 }

 void
-bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
+bifrost_postprocess_nir(nir_shader *nir, uint64_t gpu_id)
 {
   MESA_TRACE_FUNC();

@ -773,7 +774,8 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
   NIR_PASS(_, nir, pan_nir_lower_var_special_pan);
 }

-void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
+void
+bifrost_lower_texture_nir(nir_shader *nir, uint64_t gpu_id)
 {
   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);

@ -919,7 +921,7 @@ pan_nir_lower_buf_image_access(nir_shader *shader, unsigned arch)
 }

 void
-bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id)
+bifrost_lower_texture_late_nir(nir_shader *nir, uint64_t gpu_id)
 {
   NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, pan_arch(gpu_id));
   NIR_PASS(_, nir, pan_nir_lower_buf_image_access, pan_arch(gpu_id));
@ -1043,7 +1045,7 @@ bifrost_compile_shader_nir(nir_shader *nir,
      /* pan_nir_resize_varying_io may generate vector conversions which we
       * need to clean up so the back-end doesn't see them.
       */
-      unsigned gpu_id = inputs->gpu_id;
+      uint64_t gpu_id = inputs->gpu_id;
      NIR_PASS(_, nir, nir_lower_alu_width, bi_vectorize_filter, &gpu_id);
      NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
      NIR_PASS(_, nir, nir_opt_copy_prop);
@ -1079,7 +1081,7 @@ bifrost_compile_shader_nir(nir_shader *nir,

   {
      bool scalar_phis_pass = false;
-      unsigned gpu_id = inputs->gpu_id;
+      uint64_t gpu_id = inputs->gpu_id;
      NIR_PASS(scalar_phis_pass, nir, nir_lower_phis_to_scalar,
               bi_vectorize_filter, &gpu_id);
      if (scalar_phis_pass) {
--- a/src/panfrost/compiler/bifrost/cmdline.c
+++ b/src/panfrost/compiler/bifrost/cmdline.c
@ -15,7 +15,7 @@

 #include "util/os_file.h"

-unsigned gpu_id = 0x72120000;
+uint64_t gpu_id = 0x72120000;
 int verbose = 0;

 #define BI_FOURCC(ch0, ch1, ch2, ch3)                                          \
--- a/src/panfrost/compiler/midgard/disassemble.c
+++ b/src/panfrost/compiler/midgard/disassemble.c
@ -1806,7 +1806,7 @@ print_texture_word(disassemble_context *ctx, FILE *fp, const uint32_t *word,
 }

 void
-disassemble_midgard(FILE *fp, const void *code, size_t size, unsigned gpu_id,
+disassemble_midgard(FILE *fp, const void *code, size_t size, uint64_t gpu_id,
                    bool verbose)
 {
   const uint32_t *words = (const uint32_t *)code;
--- a/src/panfrost/compiler/midgard/disassemble.h
+++ b/src/panfrost/compiler/midgard/disassemble.h
@ -8,5 +8,5 @@
 #include <stdint.h>
 #include <stdio.h>

-void disassemble_midgard(FILE *fp, const void *code, size_t size, unsigned gpu_id,
-                         bool verbose);
+void disassemble_midgard(FILE *fp, const void *code, size_t size,
+                         uint64_t gpu_id, bool verbose);
--- a/src/panfrost/compiler/midgard/midgard_compile.c
+++ b/src/panfrost/compiler/midgard/midgard_compile.c
@ -356,7 +356,7 @@ lower_vec816_alu(const nir_instr *instr, const void *cb_data)
 }

 void
-midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
+midgard_preprocess_nir(nir_shader *nir, UNUSED uint64_t gpu_id)
 {
   /* Ensure that halt are translated to returns and get ride of them */
   NIR_PASS(_, nir, nir_lower_halt_to_return);
@ -381,7 +381,7 @@ midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
 }

 void
-midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
+midgard_postprocess_nir(nir_shader *nir, UNUSED uint64_t gpu_id)
 {
   midgard_lower_texture_nir(nir, gpu_id);

@ -424,7 +424,8 @@ midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
   NIR_PASS(_, nir, nir_lower_var_copies);
 }

-void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
+void
+midgard_lower_texture_nir(nir_shader *nir, uint64_t gpu_id)
 {
   NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);

--- a/src/panfrost/compiler/midgard/midgard_compile.h
+++ b/src/panfrost/compiler/midgard/midgard_compile.h
@ -11,9 +11,9 @@
 #include "panfrost/compiler/pan_compiler.h"
 #include "util/u_dynarray.h"

-void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
-void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
-void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
+void midgard_preprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void midgard_postprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void midgard_lower_texture_nir(nir_shader *nir, uint64_t gpu_id);

 bool midgard_will_dump_shaders(void);

--- a/src/panfrost/compiler/midgard/midgard_quirks.h
+++ b/src/panfrost/compiler/midgard/midgard_quirks.h
@ -6,6 +6,8 @@
 #ifndef __MDG_QUIRKS_H
 #define __MDG_QUIRKS_H

+#include "panfrost/model/pan_model.h"
+
 /* Model-specific quirks requiring compiler workarounds/etc. Quirks
 * may be errata requiring a workaround, or features. We're trying to be
 * quirk-positive here; quirky is the best! */
@ -53,9 +55,9 @@
 #define MIDGARD_NO_AUTO32 (1 << 6)

 static inline unsigned
-midgard_get_quirks(unsigned gpu_id)
+midgard_get_quirks(uint64_t gpu_id)
 {
-   switch (gpu_id >> 16) {
+   switch (MIDGARD_PRODUCT_ID(gpu_id)) {
   case 0x600:
      return MIDGARD_OLD_BLEND | MIDGARD_BROKEN_BLEND_LOADS |
             MIDGARD_BROKEN_LOD | MIDGARD_NO_UPPER_ALU | MIDGARD_NO_OOO |
--- a/src/panfrost/compiler/pan_compiler.c
+++ b/src/panfrost/compiler/pan_compiler.c
@ -57,7 +57,7 @@ pan_get_nir_shader_compiler_options(unsigned arch)
 }

 void
-pan_preprocess_nir(nir_shader *nir, unsigned gpu_id)
+pan_preprocess_nir(nir_shader *nir, uint64_t gpu_id)
 {
   if (pan_arch(gpu_id) >= 6)
      bifrost_preprocess_nir(nir, gpu_id);
@ -66,14 +66,14 @@ pan_preprocess_nir(nir_shader *nir, unsigned gpu_id)
 }

 void
-pan_optimize_nir(nir_shader *nir, unsigned gpu_id)
+pan_optimize_nir(nir_shader *nir, uint64_t gpu_id)
 {
   assert(pan_arch(gpu_id) >= 6);
   bifrost_optimize_nir(nir, gpu_id);
 }

 void
-pan_postprocess_nir(nir_shader *nir, unsigned gpu_id)
+pan_postprocess_nir(nir_shader *nir, uint64_t gpu_id)
 {
   if (pan_arch(gpu_id) >= 6)
      bifrost_postprocess_nir(nir, gpu_id);
@ -82,7 +82,7 @@ pan_postprocess_nir(nir_shader *nir, unsigned gpu_id)
 }

 void
-pan_nir_lower_texture_early(nir_shader *nir, unsigned gpu_id)
+pan_nir_lower_texture_early(nir_shader *nir, uint64_t gpu_id)
 {
   nir_lower_tex_options lower_tex_options = {
      .lower_txs_lod = true,
@ -99,7 +99,7 @@ pan_nir_lower_texture_early(nir_shader *nir, unsigned gpu_id)
 }

 void
-pan_nir_lower_texture_late(nir_shader *nir, unsigned gpu_id)
+pan_nir_lower_texture_late(nir_shader *nir, uint64_t gpu_id)
 {
   /* This must be called after any lowering of resource indices
    * (panfrost_nir_lower_res_indices / panvk_per_arch(nir_lower_descriptors))
@ -298,8 +298,8 @@ pan_shader_compile(nir_shader *s, struct pan_compile_inputs *inputs,
 }

 void
-pan_disassemble(FILE *fp, const void *code, size_t size,
-                unsigned gpu_id, bool verbose)
+pan_disassemble(FILE *fp, const void *code, size_t size, uint64_t gpu_id,
+                bool verbose)
 {
   if (pan_arch(gpu_id) >= 9)
      disassemble_valhall(fp, (const uint64_t *)code, size, verbose);
--- a/src/panfrost/compiler/pan_compiler.h
+++ b/src/panfrost/compiler/pan_compiler.h
@ -23,9 +23,9 @@ bool pan_want_debug_info(unsigned arch);
 const nir_shader_compiler_options *
 pan_get_nir_shader_compiler_options(unsigned arch);

-void pan_preprocess_nir(nir_shader *nir, unsigned gpu_id);
-void pan_optimize_nir(nir_shader *nir, unsigned gpu_id);
-void pan_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void pan_preprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void pan_optimize_nir(nir_shader *nir, uint64_t gpu_id);
+void pan_postprocess_nir(nir_shader *nir, uint64_t gpu_id);

 #define PAN_PRINTF_BUFFER_SIZE 16384

@ -104,7 +104,7 @@ unsigned pan_lookup_pushed_ubo(struct pan_ubo_push *push, unsigned ubo,
                               unsigned offs);

 struct pan_compile_inputs {
-   unsigned gpu_id;
+   uint64_t gpu_id;
   uint32_t gpu_variant;
   bool is_blend, is_blit;
   bool no_idvs;
@ -272,15 +272,13 @@ pan_varying_layout_require_layout(const struct pan_varying_layout *layout)
 enum pipe_format
 pan_varying_format(nir_alu_type type, unsigned ncomps);

-void
-pan_build_varying_layout_compact(struct pan_varying_layout *layout,
-                                 nir_shader *nir, unsigned gpu_id);
+void pan_build_varying_layout_compact(struct pan_varying_layout *layout,
+                                      nir_shader *nir, uint64_t gpu_id);

-void
-pan_varying_collect_formats(struct pan_varying_layout *registry,
-                            nir_shader *nir, unsigned gpu_id,
-                            bool trust_varying_flat_highp_types,
-                            bool lower_mediump);
+void pan_varying_collect_formats(struct pan_varying_layout *registry,
+                                 nir_shader *nir, uint64_t gpu_id,
+                                 bool trust_varying_flat_highp_types,
+                                 bool lower_mediump);

 struct pan_shader_varying {
   gl_varying_slot location;
@ -560,7 +558,7 @@ pan_res_handle(unsigned table, unsigned index)
   return (table << 24) | index;
 }

-void pan_disassemble(FILE *fp, const void *code, size_t size,
-                     unsigned gpu_id, bool verbose);
+void pan_disassemble(FILE *fp, const void *code, size_t size, uint64_t gpu_id,
+                     bool verbose);

 #endif /* __PAN_COMPILER_H__ */
--- a/src/panfrost/compiler/pan_nir.h
+++ b/src/panfrost/compiler/pan_nir.h
@ -55,11 +55,11 @@ bool pan_nir_lower_var_special_pan(nir_shader *shader);
 bool pan_nir_lower_noperspective_vs(nir_shader *shader);
 bool pan_nir_lower_noperspective_fs(nir_shader *shader);

-bool pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
+bool pan_nir_lower_vs_outputs(nir_shader *shader, uint64_t gpu_id,
                              const struct pan_varying_layout *varying_layout,
                              bool has_idvs, bool *needs_extended_fifo);

-bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
+bool pan_nir_lower_fs_inputs(nir_shader *shader, uint64_t gpu_id,
                             const struct pan_varying_layout *varying_layout,
                             struct pan_shader_info *info);

@ -72,8 +72,8 @@ bool pan_nir_lower_image_index(nir_shader *shader,
 bool pan_nir_lower_texel_buffer_fetch_index(nir_shader *shader,
                                            unsigned attrib_offset);

-void pan_nir_lower_texture_early(nir_shader *nir, unsigned gpu_id);
-void pan_nir_lower_texture_late(nir_shader *nir, unsigned gpu_id);
+void pan_nir_lower_texture_early(nir_shader *nir, uint64_t gpu_id);
+void pan_nir_lower_texture_late(nir_shader *nir, uint64_t gpu_id);

 nir_alu_type
 pan_unpacked_type_for_format(const struct util_format_description *desc);
--- a/src/panfrost/compiler/pan_nir_collect_varyings.c
+++ b/src/panfrost/compiler/pan_nir_collect_varyings.c
@ -292,7 +292,8 @@ hw_varying_slot(unsigned arch, mesa_shader_stage stage, gl_varying_slot slot)

 void
 pan_varying_collect_formats(struct pan_varying_layout *layout, nir_shader *nir,
-                            unsigned gpu_id, bool trust_varying_flat_highp_types,
+                            uint64_t gpu_id,
+                            bool trust_varying_flat_highp_types,
                            bool lower_mediump)
 {
   assert(nir->info.stage == MESA_SHADER_VERTEX ||
@ -362,7 +363,7 @@ pan_varying_collect_formats(struct pan_varying_layout *layout, nir_shader *nir,

 void
 pan_build_varying_layout_compact(struct pan_varying_layout *layout,
-                                 nir_shader *nir, unsigned gpu_id)
+                                 nir_shader *nir, uint64_t gpu_id)
 {
   pan_varying_layout_require_format(layout);

--- a/src/panfrost/compiler/pan_nir_lower_fs_inputs.c
+++ b/src/panfrost/compiler/pan_nir_lower_fs_inputs.c
@ -106,7 +106,7 @@ lower_fs_input_load(struct nir_builder *b,
 }

 bool
-pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
+pan_nir_lower_fs_inputs(nir_shader *shader, uint64_t gpu_id,
                        const struct pan_varying_layout *varying_layout,
                        struct pan_shader_info *info)
 {
--- a/src/panfrost/compiler/pan_nir_lower_vs_outputs.c
+++ b/src/panfrost/compiler/pan_nir_lower_vs_outputs.c
@ -258,7 +258,7 @@ gather_vs_outputs(struct nir_builder *b,
 }

 bool
-pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
+pan_nir_lower_vs_outputs(nir_shader *shader, uint64_t gpu_id,
                         const struct pan_varying_layout *varying_layout,
                         bool has_idvs, bool *needs_extended_fifo)
 {
--- a/src/panfrost/genxml/decode.c
+++ b/src/panfrost/genxml/decode.c
@ -153,7 +153,7 @@ pandecode_rt(struct pandecode_context *ctx, unsigned index, uint64_t gpu_va)
 }

 static void
-pandecode_rts(struct pandecode_context *ctx, uint64_t gpu_va, unsigned gpu_id,
+pandecode_rts(struct pandecode_context *ctx, uint64_t gpu_va,
              const struct MALI_FRAMEBUFFER_PARAMETERS *fb)
 {
   pandecode_log(ctx, "Color Render Targets @%" PRIx64 ":\n", gpu_va);
@ -241,7 +241,7 @@ pandecode_sample_locations(struct pandecode_context *ctx, const void *fb)

 struct pandecode_fbd
 GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
-                    bool is_fragment, unsigned gpu_id)
+                    bool is_fragment, uint64_t gpu_id)
 {
   const void *PANDECODE_PTR_VAR(ctx, fb, (uint64_t)gpu_va);
   pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
@ -302,7 +302,7 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
   DUMP_UNPACKED(ctx, FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
 #if PAN_ARCH >= 6
   if (params.tiler)
-      GENX(pandecode_tiler)(ctx, params.tiler, gpu_id);
+      GENX(pandecode_tiler)(ctx, params.tiler);
 #endif

   ctx->indent--;
@ -318,7 +318,7 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
   }

   if (is_fragment)
-      pandecode_rts(ctx, gpu_va, gpu_id, &params);
+      pandecode_rts(ctx, gpu_va, &params);

   return (struct pandecode_fbd){
      .rt_count = params.render_target_count,
@ -531,8 +531,7 @@ GENX(pandecode_texture)(struct pandecode_context *ctx,

 #if PAN_ARCH >= 6
 void
-GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va,
-                      unsigned gpu_id)
+GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va)
 {
   pan_unpack(PANDECODE_PTR(ctx, gpu_va, struct mali_tiler_context_packed),
              TILER_CONTEXT, t);
@ -569,7 +568,7 @@ GENX(pandecode_fau)(struct pandecode_context *ctx, uint64_t addr,

 uint64_t
 GENX(pandecode_shader)(struct pandecode_context *ctx, uint64_t addr,
-                       const char *label, unsigned gpu_id)
+                       const char *label, uint64_t gpu_id)
 {
   MAP_ADDR(ctx, SHADER_PROGRAM, addr, cl);
   pan_unpack(cl, SHADER_PROGRAM, desc);
@ -691,7 +690,7 @@ GENX(pandecode_depth_stencil)(struct pandecode_context *ctx, uint64_t addr)
 void
 GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
                                   const struct MALI_SHADER_ENVIRONMENT *p,
-                                   unsigned gpu_id)
+                                   uint64_t gpu_id)
 {
   if (p->shader)
      GENX(pandecode_shader)(ctx, p->shader, "Shader", gpu_id);
@ -709,7 +708,7 @@ GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
 void
 GENX(pandecode_blend_descs)(struct pandecode_context *ctx, uint64_t blend,
                            unsigned count, uint64_t frag_shader,
-                            unsigned gpu_id)
+                            uint64_t gpu_id)
 {
   for (unsigned i = 0; i < count; ++i) {
      struct mali_blend_packed *PANDECODE_PTR_VAR(ctx, blend_descs, blend);
@ -726,7 +725,7 @@ GENX(pandecode_blend_descs)(struct pandecode_context *ctx, uint64_t blend,

 void
 GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
-                    unsigned unused, unsigned gpu_id)
+                    unsigned unused, uint64_t gpu_id)
 {
   uint64_t frag_shader = 0;

--- a/src/panfrost/genxml/decode.h
+++ b/src/panfrost/genxml/decode.h
@ -90,15 +90,15 @@ void pandecode_validate_buffer(struct pandecode_context *ctx, uint64_t addr,

 /* Forward declare for all supported gens to permit thunking */
 void pandecode_jc_v4(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 void pandecode_jc_v5(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 void pandecode_jc_v6(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 void pandecode_jc_v7(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 void pandecode_jc_v9(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);

 void pandecode_abort_on_fault_v4(struct pandecode_context *ctx,
                                 uint64_t jc_gpu_va);
@ -112,25 +112,25 @@ void pandecode_abort_on_fault_v9(struct pandecode_context *ctx,
                                 uint64_t jc_gpu_va);

 void pandecode_interpret_cs_v10(struct pandecode_context *ctx, uint64_t queue,
-                                uint32_t size, unsigned gpu_id, uint32_t *regs);
+                                uint32_t size, uint64_t gpu_id, uint32_t *regs);
 void pandecode_cs_binary_v10(struct pandecode_context *ctx, uint64_t bin,
-                             uint32_t bin_size, unsigned gpu_id);
+                             uint32_t bin_size);
 void pandecode_cs_trace_v10(struct pandecode_context *ctx, uint64_t trace,
-                            uint32_t trace_size, unsigned gpu_id);
+                            uint32_t trace_size, uint64_t gpu_id);

 void pandecode_interpret_cs_v12(struct pandecode_context *ctx, uint64_t queue,
-                                uint32_t size, unsigned gpu_id, uint32_t *regs);
+                                uint32_t size, uint64_t gpu_id, uint32_t *regs);
 void pandecode_cs_binary_v12(struct pandecode_context *ctx, uint64_t bin,
-                             uint32_t bin_size, unsigned gpu_id);
+                             uint32_t bin_size);
 void pandecode_cs_trace_v12(struct pandecode_context *ctx, uint64_t trace,
-                            uint32_t trace_size, unsigned gpu_id);
+                            uint32_t trace_size, uint64_t gpu_id);

 void pandecode_interpret_cs_v13(struct pandecode_context *ctx, uint64_t queue,
-                                uint32_t size, unsigned gpu_id, uint32_t *regs);
+                                uint32_t size, uint64_t gpu_id, uint32_t *regs);
 void pandecode_cs_binary_v13(struct pandecode_context *ctx, uint64_t bin,
-                             uint32_t bin_size, unsigned gpu_id);
+                             uint32_t bin_size);
 void pandecode_cs_trace_v13(struct pandecode_context *ctx, uint64_t trace,
-                            uint32_t trace_size, unsigned gpu_id);
+                            uint32_t trace_size, uint64_t gpu_id);

 /* Logging infrastructure */
 static void
@ -206,7 +206,7 @@ pandecode_log_cont(struct pandecode_context *ctx, const char *format, ...)
   }

 void pandecode_shader_disassemble(struct pandecode_context *ctx,
-                                  uint64_t shader_ptr, unsigned gpu_id);
+                                  uint64_t shader_ptr, uint64_t gpu_id);

 #ifdef PAN_ARCH

@ -218,16 +218,16 @@ struct pandecode_fbd {

 struct pandecode_fbd GENX(pandecode_fbd)(struct pandecode_context *ctx,
                                         uint64_t gpu_va, bool is_fragment,
-                                         unsigned gpu_id);
+                                         uint64_t gpu_id);

 #if PAN_ARCH >= 9
 void GENX(pandecode_dcd)(struct pandecode_context *ctx,
                         const struct MALI_DRAW *p, unsigned unused,
-                         unsigned gpu_id);
+                         uint64_t gpu_id);
 #else
 void GENX(pandecode_dcd)(struct pandecode_context *ctx,
                         const struct MALI_DRAW *p, enum mali_job_type job_type,
-                         unsigned gpu_id);
+                         uint64_t gpu_id);
 #endif

 #if PAN_ARCH <= 5
@ -246,15 +246,14 @@ uint64_t GENX(pandecode_blend)(struct pandecode_context *ctx,
 #endif

 #if PAN_ARCH >= 6
-void GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va,
-                           unsigned gpu_id);
+void GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va);
 #endif

 #if PAN_ARCH >= 9
 #if PAN_ARCH < 12
 void GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
                                        const struct MALI_SHADER_ENVIRONMENT *p,
-                                        unsigned gpu_id);
+                                        uint64_t gpu_id);
 #endif

 void GENX(pandecode_resource_tables)(struct pandecode_context *ctx,
@ -264,11 +263,11 @@ void GENX(pandecode_fau)(struct pandecode_context *ctx, uint64_t addr,
                         unsigned count, const char *name);

 uint64_t GENX(pandecode_shader)(struct pandecode_context *ctx, uint64_t addr,
-                                const char *label, unsigned gpu_id);
+                                const char *label, uint64_t gpu_id);

 void GENX(pandecode_blend_descs)(struct pandecode_context *ctx, uint64_t blend,
                                 unsigned count, uint64_t frag_shader,
-                                 unsigned gpu_id);
+                                 uint64_t gpu_id);

 void GENX(pandecode_depth_stencil)(struct pandecode_context *ctx,
                                   uint64_t addr);
--- a/src/panfrost/genxml/decode_common.c
+++ b/src/panfrost/genxml/decode_common.c
@ -352,7 +352,7 @@ pandecode_dump_mappings(struct pandecode_context *ctx)

 void
 pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                         unsigned gpu_id)
+                         uint64_t gpu_id)
 {
   simple_mtx_lock(&ctx->lock);

@ -380,7 +380,7 @@ pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va,
 }

 void
-pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, unsigned gpu_id)
+pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, uint64_t gpu_id)
 {
   simple_mtx_lock(&ctx->lock);

@ -409,7 +409,7 @@ pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, unsigned gpu_id)

 void
 pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
-                       uint32_t size, unsigned gpu_id, uint32_t *regs)
+                       uint32_t size, uint64_t gpu_id, uint32_t *regs)
 {
   simple_mtx_lock(&ctx->lock);

@ -432,19 +432,19 @@ pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,

 void
 pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
-                   uint32_t size, unsigned gpu_id)
+                    uint32_t size, uint64_t gpu_id)
 {
   simple_mtx_lock(&ctx->lock);

   switch (pan_arch(gpu_id)) {
   case 10:
-      pandecode_cs_binary_v10(ctx, bin_gpu_va, size, gpu_id);
+      pandecode_cs_binary_v10(ctx, bin_gpu_va, size);
      break;
   case 12:
-      pandecode_cs_binary_v12(ctx, bin_gpu_va, size, gpu_id);
+      pandecode_cs_binary_v12(ctx, bin_gpu_va, size);
      break;
   case 13:
-      pandecode_cs_binary_v13(ctx, bin_gpu_va, size, gpu_id);
+      pandecode_cs_binary_v13(ctx, bin_gpu_va, size);
      break;
   default:
      UNREACHABLE("Unsupported architecture");
@ -455,7 +455,7 @@ pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,

 void
 pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
-                   uint32_t size, unsigned gpu_id)
+                   uint32_t size, uint64_t gpu_id)
 {
   simple_mtx_lock(&ctx->lock);

@ -478,7 +478,7 @@ pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,

 void
 pandecode_shader_disassemble(struct pandecode_context *ctx, uint64_t shader_ptr,
-                             unsigned gpu_id)
+                             uint64_t gpu_id)
 {
   uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr);

--- a/src/panfrost/genxml/decode_csf.c
+++ b/src/panfrost/genxml/decode_csf.c
@ -46,7 +46,7 @@ struct queue_ctx {
   } call_stack[MAX_CALL_STACK_DEPTH + 1]; /* +1 for exception handler */
   uint8_t call_stack_depth;

-   unsigned gpu_id;
+   uint64_t gpu_id;
 };

 static void
@ -789,7 +789,7 @@ pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp,
   if (tiler_flags.index_type)
      pandecode_log(ctx, "Index array size: %u\n", cs_get_u32(qctx, 39));

-   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
+   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40));

   DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
   pandecode_log(ctx, "Low depth clamp: %f\n", uif(cs_get_u32(qctx, 44)));
@ -902,7 +902,7 @@ pandecode_run_idvs2(struct pandecode_context *ctx, FILE *fp,
   pandecode_log(ctx, "Vertex offset: %u\n", vertex_offset);
   pandecode_log(ctx, "Instance offset: %u\n", instance_offset);

-   GENX(pandecode_tiler)(ctx, tilder_descriptor_pointer, qctx->gpu_id);
+   GENX(pandecode_tiler)(ctx, tilder_descriptor_pointer);

   /* If this is true, then the scissor is actually a pointer to an
    * array of boxes; bottom 56 bits are the pointer and top 8 are
@ -1062,8 +1062,7 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
      pandecode_log(ctx, "Index array size: %u\n",
                    cs_get_u32(qctx, MALI_IDVS_SR_INDEX_BUFFER_SIZE));

-   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX),
-                         qctx->gpu_id);
+   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX));

   DUMP_CL(ctx, SCISSOR, &qctx->regs[MALI_IDVS_SR_SCISSOR_BOX], "Scissor\n");
   pandecode_log(ctx, "Low depth clamp: %f\n",
@ -1134,7 +1133,7 @@ pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp,
   pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
   DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");

-   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
+   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40));

   DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");

@ -1669,7 +1668,7 @@ no_interpret:

 void
 GENX(pandecode_interpret_cs)(struct pandecode_context *ctx, uint64_t queue,
-                             uint32_t size, unsigned gpu_id, uint32_t *regs)
+                             uint32_t size, uint64_t gpu_id, uint32_t *regs)
 {
   pandecode_dump_file_open(ctx);

@ -2451,7 +2450,7 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin,

 void
 GENX(pandecode_cs_binary)(struct pandecode_context *ctx, uint64_t bin,
-                          uint32_t bin_size, unsigned gpu_id)
+                          uint32_t bin_size)
 {
   if (!bin_size)
      return;
@ -2478,7 +2477,7 @@ GENX(pandecode_cs_binary)(struct pandecode_context *ctx, uint64_t bin,

 void
 GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
-                         uint32_t trace_size, unsigned gpu_id)
+                         uint32_t trace_size, uint64_t gpu_id)
 {
   pandecode_dump_file_open(ctx);

--- a/src/panfrost/genxml/decode_jm.c
+++ b/src/panfrost/genxml/decode_jm.c
@ -238,7 +238,7 @@ pandecode_uniforms(struct pandecode_context *ctx, uint64_t uniforms,

 void
 GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
-                    enum mali_job_type job_type, unsigned gpu_id)
+                    enum mali_job_type job_type, uint64_t gpu_id)
 {
 #if PAN_ARCH >= 5
   struct pandecode_fbd fbd_info = {.rt_count = 1};
@ -385,7 +385,7 @@ GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
 static void
 pandecode_vertex_compute_geometry_job(struct pandecode_context *ctx,
                                      const struct MALI_JOB_HEADER *h,
-                                      uint64_t job, unsigned gpu_id)
+                                      uint64_t job, uint64_t gpu_id)
 {
   struct mali_compute_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
   pan_section_unpack(p, COMPUTE_JOB, DRAW, draw);
@ -422,7 +422,7 @@ pandecode_cache_flush_job(struct pandecode_context *ctx, uint64_t job)
 static void
 pandecode_tiler_job(struct pandecode_context *ctx,
                    const struct MALI_JOB_HEADER *h, uint64_t job,
-                    unsigned gpu_id)
+                    uint64_t gpu_id)
 {
   struct mali_tiler_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
   pan_section_unpack(p, TILER_JOB, DRAW, draw);
@ -441,7 +441,7 @@ pandecode_tiler_job(struct pandecode_context *ctx,

 #if PAN_ARCH >= 6
   pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr);
-   GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
+   GENX(pandecode_tiler)(ctx, tiler_ptr.address);

 #if PAN_ARCH >= 9
   DUMP_SECTION(ctx, TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n");
@ -459,7 +459,7 @@ pandecode_tiler_job(struct pandecode_context *ctx,

 static void
 pandecode_fragment_job(struct pandecode_context *ctx, uint64_t job,
-                       unsigned gpu_id)
+                       uint64_t gpu_id)
 {
   struct mali_fragment_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
   pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s);
@ -497,7 +497,7 @@ pandecode_fragment_job(struct pandecode_context *ctx, uint64_t job,
 static void
 pandecode_indexed_vertex_job(struct pandecode_context *ctx,
                             const struct MALI_JOB_HEADER *h, uint64_t job,
-                             unsigned gpu_id)
+                             uint64_t gpu_id)
 {
   struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(ctx, p, job);

@ -518,7 +518,7 @@ pandecode_indexed_vertex_job(struct pandecode_context *ctx,
   pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr);
   pandecode_log(ctx, "Tiler Job Payload:\n");
   ctx->indent++;
-   GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
+   GENX(pandecode_tiler)(ctx, tiler_ptr.address);
   ctx->indent--;

   pandecode_invocation(ctx,
@ -535,7 +535,7 @@ pandecode_indexed_vertex_job(struct pandecode_context *ctx,
 #if PAN_ARCH == 9
 static void
 pandecode_malloc_vertex_job(struct pandecode_context *ctx, uint64_t job,
-                            unsigned gpu_id)
+                            uint64_t gpu_id)
 {
   struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(ctx, p, job);

@ -553,7 +553,7 @@ pandecode_malloc_vertex_job(struct pandecode_context *ctx, uint64_t job,
   pandecode_log(ctx, "Tiler Job Payload:\n");
   ctx->indent++;
   if (tiler_ptr.address)
-      GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
+      GENX(pandecode_tiler)(ctx, tiler_ptr.address);
   else
      pandecode_log(ctx, "<omitted>\n");
   ctx->indent--;
@ -568,7 +568,7 @@ pandecode_malloc_vertex_job(struct pandecode_context *ctx, uint64_t job,

 static void
 pandecode_compute_job(struct pandecode_context *ctx, uint64_t job,
-                      unsigned gpu_id)
+                      uint64_t gpu_id)
 {
   struct mali_compute_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
   pan_section_unpack(p, COMPUTE_JOB, PAYLOAD, payload);
@ -584,7 +584,7 @@ pandecode_compute_job(struct pandecode_context *ctx, uint64_t job,
 */
 void
 GENX(pandecode_jc)(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                   unsigned gpu_id)
+                   uint64_t gpu_id)
 {
   pandecode_dump_file_open(ctx);

--- a/src/panfrost/lib/kmod/pan_kmod.h
+++ b/src/panfrost/lib/kmod/pan_kmod.h
@ -176,7 +176,7 @@ struct pan_kmod_bo {
 /* List of GPU properties needed by the UMD. */
 struct pan_kmod_dev_props {
   /* GPU ID. */
-   uint32_t gpu_id;
+   uint64_t gpu_id;

   /* GPU variant. */
   uint32_t gpu_variant;
--- a/src/panfrost/lib/wrap.h
+++ b/src/panfrost/lib/wrap.h
@ -39,19 +39,19 @@ void pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va,
                           unsigned sz);

 void pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                  unsigned gpu_id);
+                  uint64_t gpu_id);

 void pandecode_interpret_cs(struct pandecode_context *ctx,
                            uint64_t queue_gpu_va, uint32_t size,
-                            unsigned gpu_id, uint32_t *regs);
+                            uint64_t gpu_id, uint32_t *regs);

 void pandecode_cs_binary(struct pandecode_context *ctx, uint64_t binary_gpu_va,
-                         uint32_t size, unsigned gpu_id);
+                         uint32_t size, uint64_t gpu_id);

 void pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
-                        uint32_t size, unsigned gpu_id);
+                        uint32_t size, uint64_t gpu_id);

 void pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                              unsigned gpu_id);
+                              uint64_t gpu_id);

 #endif /* __MMAP_TRACE_H__ */
--- a/src/panfrost/model/pan_model.c
+++ b/src/panfrost/model/pan_model.c
@ -124,7 +124,7 @@ const struct pan_model pan_model_list[] = {
 #undef MODEL_QUIRKS

 static uint32_t
-get_prod_id(uint32_t gpu_id)
+get_prod_id(uint64_t gpu_id)
 {
   unsigned arch = pan_arch(gpu_id);
   if (arch < 6)
@ -140,7 +140,7 @@ get_prod_id(uint32_t gpu_id)
 * supported at this time.
 */
 const struct pan_model *
-pan_get_model(uint32_t gpu_id, uint32_t gpu_variant)
+pan_get_model(uint64_t gpu_id, uint32_t gpu_variant)
 {
   uint32_t gpu_prod_id = get_prod_id(gpu_id);
   for (unsigned i = 0; i < ARRAY_SIZE(pan_model_list); ++i) {
--- a/src/panfrost/model/pan_model.h
+++ b/src/panfrost/model/pan_model.h
@ -79,14 +79,14 @@ struct pan_model {
   } quirks;
 };

-const struct pan_model *pan_get_model(uint32_t gpu_id, uint32_t gpu_variant);
+const struct pan_model *pan_get_model(uint64_t gpu_id, uint32_t gpu_variant);

 /* Returns the architecture version given a GPU ID, either from a table for
 * old-style Midgard versions or directly for new-style Bifrost/Valhall
 * versions */

 static inline unsigned
-pan_arch(unsigned gpu_id)
+pan_arch(uint64_t gpu_id)
 {
   switch (MIDGARD_PRODUCT_ID(gpu_id)) {
   case 0x600:
--- a/src/panfrost/tools/panfrostdump.c
+++ b/src/panfrost/tools/panfrostdump.c
@ -45,7 +45,7 @@ struct panfrost_dump_object_header_ho {
   union {
      struct pan_reg_hdr_ho {
         uint64_t jc;
-         uint32_t gpu_id;
+         uint64_t gpu_id;
         uint32_t major;
         uint32_t minor;
         uint64_t nbos;
@ -93,7 +93,7 @@ read_header(FILE *fp, struct panfrost_dump_object_header_ho *pdoh)
   switch (pdoh->type) {
   case PANFROSTDUMP_BUF_REG:
      pdoh->reghdr.jc = le64toh(doh_le.reghdr.jc);
-      pdoh->reghdr.gpu_id = le32toh(doh_le.reghdr.gpu_id);
+      pdoh->reghdr.gpu_id = le64toh(doh_le.reghdr.gpu_id);
      pdoh->reghdr.major = le32toh(doh_le.reghdr.major);
      pdoh->reghdr.minor = le32toh(doh_le.reghdr.minor);
      pdoh->reghdr.nbos = le64toh(doh_le.reghdr.nbos);
@ -195,7 +195,7 @@ main(int argc, char *argv[])
   struct panfrost_dump_object_header_ho doh;
   bool print_addr = false;
   bool print_reg = false;
-   uint32_t gpu_id = 0;
+   uint64_t gpu_id = 0;
   uint64_t jc = 0;
   size_t nbytes;
   int i, j, k, c;
@ -269,7 +269,7 @@ main(int argc, char *argv[])
         return EXIT_FAILURE;
      }

-      printf("JC: %" PRIX64 ", GPU_ID: %" PRIX32 "\n", jc, gpu_id);
+      printf("JC: %" PRIX64 ", GPU_ID: %" PRIX64 "\n", jc, gpu_id);

      if (print_reg) {
         puts("GPU registers:");
--- a/src/panfrost/vulkan/panvk_physical_device.c
+++ b/src/panfrost/vulkan/panvk_physical_device.c
@ -160,9 +160,10 @@ init_shader_caches(struct panvk_physical_device *device,
   memcpy(device->cache_uuid, blake3, VK_UUID_SIZE);

 #ifdef ENABLE_SHADER_CACHE
-   char renderer[17];
-   ASSERTED int len = snprintf(renderer, sizeof(renderer), "panvk_0x%08x",
-                               device->kmod.dev->props.gpu_id);
+   char renderer[25];
+   ASSERTED int len =
+      snprintf(renderer, sizeof(renderer), "panvk_0x%016" PRIx64,
+               device->kmod.dev->props.gpu_id);
   assert(len == sizeof(renderer) - 1);

   char timestamp[BLAKE3_HEX_LEN];
@ -401,7 +402,7 @@ panvk_physical_device_init(struct panvk_physical_device *device,

   if (!device->model) {
      result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
-                            "Unknown gpu_id (%#x) or variant (%#x)",
+                            "Unknown gpu_id (%#" PRIx64 ") or variant (%#x)",
                            device->kmod.dev->props.gpu_id,
                            device->kmod.dev->props.gpu_variant);
      goto fail;
--- a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
@ -830,9 +830,10 @@ cmd_preload_zs_attachments(struct panvk_cmd_buffer *cmdbuf,
   struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
   struct panvk_physical_device *pdev =
      to_panvk_physical_device(dev->vk.physical);
-   unsigned gpu_prod_id = pdev->kmod.dev->props.gpu_id >> 16;
+   unsigned arch_major = PAN_ARCH_MAJOR(pdev->kmod.dev->props.gpu_id);
+   unsigned arch_minor = PAN_ARCH_MINOR(pdev->kmod.dev->props.gpu_id);

-   if (gpu_prod_id >= 0x7200)
+   if (arch_major > 7 || (arch_major == 7 && arch_minor >= 2))
      fs->modes[dcd_idx] = MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS;
   else if (always_load(load, &key))
      fs->modes[dcd_idx] = MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;