diff --git a/src/panfrost/clc/pan_compile.c b/src/panfrost/clc/pan_compile.c
index 7fa198267c6..9d8026be231 100644
--- a/src/panfrost/clc/pan_compile.c
+++ b/src/panfrost/clc/pan_compile.c
@@ -353,10 +353,10 @@ main(int argc, const char **argv)
             libfunc, MESA_SHADER_COMPUTE, v, get_compiler_options(target_arch),
             &opt, load_kernel_input);
 
-         unsigned gpu_prod_id = (target_arch & 0xf) << 12;
+         uint64_t target_gpu_id = (target_arch & 0xf) << 28;
 
          struct pan_compile_inputs inputs = {
-            .gpu_id = gpu_prod_id << 16,
+            .gpu_id = target_gpu_id,
             .gpu_variant = 0,
          };
 
diff --git a/src/panfrost/compiler/bifrost/bi_quirks.h b/src/panfrost/compiler/bifrost/bi_quirks.h
index 78aba3f190b..5305db65f59 100644
--- a/src/panfrost/compiler/bifrost/bi_quirks.h
+++ b/src/panfrost/compiler/bifrost/bi_quirks.h
@@ -6,6 +6,8 @@
 #ifndef __BI_QUIRKS_H
 #define __BI_QUIRKS_H
 
+#include "panfrost/model/pan_model.h"
+
 /* Model-specific quirks requiring compiler workarounds/etc. Quirks
  * may be errata requiring a workaround, or features. We're trying to be
  * quirk-positive here; quirky is the best! */
@@ -24,9 +26,11 @@
 #define BIFROST_LIMITED_CLPER (1 << 1)
 
 static inline unsigned
-bifrost_get_quirks(unsigned gpu_id)
+bifrost_get_quirks(uint64_t gpu_id)
 {
-   switch (gpu_id >> 24) {
+   unsigned arch_maj_min =
+      (PAN_ARCH_MAJOR(gpu_id) << 4) | PAN_ARCH_MINOR(gpu_id);
+   switch (arch_maj_min) {
    case 0x60: /* G71 */
       return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
    case 0x62: /* G72 */
diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c
index f38e4af223d..5013e167e08 100644
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@@ -4964,7 +4964,7 @@ va_count_stats(bi_context *ctx, unsigned nr_ins, unsigned size,
 
    if (model == NULL) {
       /* Get G57 by default: */
-      model = pan_get_model(((uint32_t)0x9001) << 16, 0);
+      model = pan_get_model(((uint64_t)0x9001) << 16, 0);
       assert(model);
    }
 
diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.h b/src/panfrost/compiler/bifrost/bifrost_compile.h
index abe2622c8e7..45fc6526383 100644
--- a/src/panfrost/compiler/bifrost/bifrost_compile.h
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.h
@@ -62,11 +62,11 @@ bifrost_precompiled_kernel_prepare_push_uniforms(
           user_data_size);
 }
 
-void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id);
-void bifrost_optimize_nir(nir_shader *nir, unsigned gpu_id);
-void bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id);
-void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
-void bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id);
+void bifrost_preprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void bifrost_optimize_nir(nir_shader *nir, uint64_t gpu_id);
+void bifrost_postprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void bifrost_lower_texture_nir(nir_shader *nir, uint64_t gpu_id);
+void bifrost_lower_texture_late_nir(nir_shader *nir, uint64_t gpu_id);
 
 void bifrost_compile_shader_nir(nir_shader *nir,
                                 const struct pan_compile_inputs *inputs,
diff --git a/src/panfrost/compiler/bifrost/bifrost_nir.c b/src/panfrost/compiler/bifrost/bifrost_nir.c
index d346581530d..87f16b607f1 100644
--- a/src/panfrost/compiler/bifrost/bifrost_nir.c
+++ b/src/panfrost/compiler/bifrost/bifrost_nir.c
@@ -29,7 +29,7 @@ bi_lower_bit_size(const nir_instr *instr, void *data)
    switch (instr->type) {
    case nir_instr_type_alu: {
       nir_alu_instr *alu = nir_instr_as_alu(instr);
-      unsigned gpu_id = *((unsigned *)data);
+      uint64_t gpu_id = *((uint64_t *)data);
 
       switch (alu->op) {
       case nir_op_fexp2:
@@ -95,7 +95,7 @@ bi_lower_bit_size(const nir_instr *instr, void *data)
 static uint8_t
 bi_vectorize_filter(const nir_instr *instr, const void *data)
 {
-   unsigned gpu_id = *((unsigned *)data);
+   uint64_t gpu_id = *((uint64_t *)data);
 
    if (instr->type == nir_instr_type_phi) {
       unsigned bit_size = nir_instr_as_phi(instr)->def.bit_size;
@@ -191,7 +191,7 @@ mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size,
 }
 
 static void
-bi_optimize_loop_nir(nir_shader *nir, unsigned gpu_id, bool allow_copies)
+bi_optimize_loop_nir(nir_shader *nir, uint64_t gpu_id, bool allow_copies)
 {
    bool progress;
 
@@ -268,13 +268,14 @@ bi_optimize_loop_nir(nir_shader *nir, unsigned gpu_id, bool allow_copies)
 }
 
 void
-bifrost_optimize_nir(nir_shader *nir, unsigned gpu_id)
+bifrost_optimize_nir(nir_shader *nir, uint64_t gpu_id)
 {
    bi_optimize_loop_nir(nir, gpu_id, true);
 }
 
 static void
-bi_optimize_nir(nir_shader *nir, unsigned gpu_id, nir_variable_mode robust_modes)
+bi_optimize_nir(nir_shader *nir, uint64_t gpu_id,
+                nir_variable_mode robust_modes)
 {
    NIR_PASS(_, nir, nir_opt_shrink_stores, true);
    bi_optimize_loop_nir(nir, gpu_id, false);
@@ -378,7 +379,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, nir_variable_mode robust_modes
 }
 
 void
-bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
+bifrost_preprocess_nir(nir_shader *nir, uint64_t gpu_id)
 {
    MESA_TRACE_FUNC();
 
@@ -513,7 +514,7 @@ bi_fp32_varying_mask(nir_shader *nir)
 static bool
 bi_lower_subgroups(nir_builder *b, nir_intrinsic_instr *intr, void *data)
 {
-   unsigned int gpu_id = *(unsigned int *)data;
+   uint64_t gpu_id = *(uint64_t *)data;
    unsigned int arch = pan_arch(gpu_id);
 
    b->cursor = nir_before_instr(&intr->instr);
@@ -670,7 +671,7 @@ mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
 }
 
 void
-bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
+bifrost_postprocess_nir(nir_shader *nir, uint64_t gpu_id)
 {
    MESA_TRACE_FUNC();
 
@@ -773,7 +774,8 @@ bifrost_postprocess_nir(nir_shader *nir, unsigned gpu_id)
    NIR_PASS(_, nir, pan_nir_lower_var_special_pan);
 }
 
-void bifrost_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
+void
+bifrost_lower_texture_nir(nir_shader *nir, uint64_t gpu_id)
 {
    NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
 
@@ -919,7 +921,7 @@ pan_nir_lower_buf_image_access(nir_shader *shader, unsigned arch)
 }
 
 void
-bifrost_lower_texture_late_nir(nir_shader *nir, unsigned gpu_id)
+bifrost_lower_texture_late_nir(nir_shader *nir, uint64_t gpu_id)
 {
    NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, pan_arch(gpu_id));
    NIR_PASS(_, nir, pan_nir_lower_buf_image_access, pan_arch(gpu_id));
@@ -1043,7 +1045,7 @@ bifrost_compile_shader_nir(nir_shader *nir,
       /* pan_nir_resize_varying_io may generate vector conversions which we
        * need to clean up so the back-end doesn't see them.
        */
-      unsigned gpu_id = inputs->gpu_id;
+      uint64_t gpu_id = inputs->gpu_id;
       NIR_PASS(_, nir, nir_lower_alu_width, bi_vectorize_filter, &gpu_id);
       NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
       NIR_PASS(_, nir, nir_opt_copy_prop);
@@ -1079,7 +1081,7 @@ bifrost_compile_shader_nir(nir_shader *nir,
 
    {
       bool scalar_phis_pass = false;
-      unsigned gpu_id = inputs->gpu_id;
+      uint64_t gpu_id = inputs->gpu_id;
       NIR_PASS(scalar_phis_pass, nir, nir_lower_phis_to_scalar,
                bi_vectorize_filter, &gpu_id);
       if (scalar_phis_pass) {
diff --git a/src/panfrost/compiler/bifrost/cmdline.c b/src/panfrost/compiler/bifrost/cmdline.c
index a50c163d661..b94b7efbefe 100644
--- a/src/panfrost/compiler/bifrost/cmdline.c
+++ b/src/panfrost/compiler/bifrost/cmdline.c
@@ -15,7 +15,7 @@
 
 #include "util/os_file.h"
 
-unsigned gpu_id = 0x72120000;
+uint64_t gpu_id = 0x72120000;
 int verbose = 0;
 
 #define BI_FOURCC(ch0, ch1, ch2, ch3)                                          \
diff --git a/src/panfrost/compiler/midgard/disassemble.c b/src/panfrost/compiler/midgard/disassemble.c
index f402cfe4f37..1edb7ee4133 100644
--- a/src/panfrost/compiler/midgard/disassemble.c
+++ b/src/panfrost/compiler/midgard/disassemble.c
@@ -1806,7 +1806,7 @@ print_texture_word(disassemble_context *ctx, FILE *fp, const uint32_t *word,
 }
 
 void
-disassemble_midgard(FILE *fp, const void *code, size_t size, unsigned gpu_id,
+disassemble_midgard(FILE *fp, const void *code, size_t size, uint64_t gpu_id,
                     bool verbose)
 {
    const uint32_t *words = (const uint32_t *)code;
diff --git a/src/panfrost/compiler/midgard/disassemble.h b/src/panfrost/compiler/midgard/disassemble.h
index 6e4f989092b..67dafcadf44 100644
--- a/src/panfrost/compiler/midgard/disassemble.h
+++ b/src/panfrost/compiler/midgard/disassemble.h
@@ -8,5 +8,5 @@
 #include <stdint.h>
 #include <stdio.h>
 
-void disassemble_midgard(FILE *fp, const void *code, size_t size, unsigned gpu_id,
-                         bool verbose);
+void disassemble_midgard(FILE *fp, const void *code, size_t size,
+                         uint64_t gpu_id, bool verbose);
diff --git a/src/panfrost/compiler/midgard/midgard_compile.c b/src/panfrost/compiler/midgard/midgard_compile.c
index ee011c9b7d0..9cc33d9dd13 100644
--- a/src/panfrost/compiler/midgard/midgard_compile.c
+++ b/src/panfrost/compiler/midgard/midgard_compile.c
@@ -356,7 +356,7 @@ lower_vec816_alu(const nir_instr *instr, const void *cb_data)
 }
 
 void
-midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
+midgard_preprocess_nir(nir_shader *nir, UNUSED uint64_t gpu_id)
 {
    /* Ensure that halt are translated to returns and get ride of them */
    NIR_PASS(_, nir, nir_lower_halt_to_return);
@@ -381,7 +381,7 @@ midgard_preprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
 }
 
 void
-midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
+midgard_postprocess_nir(nir_shader *nir, UNUSED uint64_t gpu_id)
 {
    midgard_lower_texture_nir(nir, gpu_id);
 
@@ -424,7 +424,8 @@ midgard_postprocess_nir(nir_shader *nir, UNUSED unsigned gpu_id)
    NIR_PASS(_, nir, nir_lower_var_copies);
 }
 
-void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id)
+void
+midgard_lower_texture_nir(nir_shader *nir, uint64_t gpu_id)
 {
    NIR_PASS(_, nir, nir_lower_image_atomics_to_global, NULL, NULL);
 
diff --git a/src/panfrost/compiler/midgard/midgard_compile.h b/src/panfrost/compiler/midgard/midgard_compile.h
index af439238367..962890797c1 100644
--- a/src/panfrost/compiler/midgard/midgard_compile.h
+++ b/src/panfrost/compiler/midgard/midgard_compile.h
@@ -11,9 +11,9 @@
 #include "panfrost/compiler/pan_compiler.h"
 #include "util/u_dynarray.h"
 
-void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id);
-void midgard_postprocess_nir(nir_shader *nir, unsigned gpu_id);
-void midgard_lower_texture_nir(nir_shader *nir, unsigned gpu_id);
+void midgard_preprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void midgard_postprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void midgard_lower_texture_nir(nir_shader *nir, uint64_t gpu_id);
 
 bool midgard_will_dump_shaders(void);
 
diff --git a/src/panfrost/compiler/midgard/midgard_quirks.h b/src/panfrost/compiler/midgard/midgard_quirks.h
index 4c580c16552..9edf6b87c77 100644
--- a/src/panfrost/compiler/midgard/midgard_quirks.h
+++ b/src/panfrost/compiler/midgard/midgard_quirks.h
@@ -6,6 +6,8 @@
 #ifndef __MDG_QUIRKS_H
 #define __MDG_QUIRKS_H
 
+#include "panfrost/model/pan_model.h"
+
 /* Model-specific quirks requiring compiler workarounds/etc. Quirks
  * may be errata requiring a workaround, or features. We're trying to be
  * quirk-positive here; quirky is the best! */
@@ -53,9 +55,9 @@
 #define MIDGARD_NO_AUTO32 (1 << 6)
 
 static inline unsigned
-midgard_get_quirks(unsigned gpu_id)
+midgard_get_quirks(uint64_t gpu_id)
 {
-   switch (gpu_id >> 16) {
+   switch (MIDGARD_PRODUCT_ID(gpu_id)) {
    case 0x600:
       return MIDGARD_OLD_BLEND | MIDGARD_BROKEN_BLEND_LOADS |
              MIDGARD_BROKEN_LOD | MIDGARD_NO_UPPER_ALU | MIDGARD_NO_OOO |
diff --git a/src/panfrost/compiler/pan_compiler.c b/src/panfrost/compiler/pan_compiler.c
index c71a811c319..9b65f3ee9ec 100644
--- a/src/panfrost/compiler/pan_compiler.c
+++ b/src/panfrost/compiler/pan_compiler.c
@@ -57,7 +57,7 @@ pan_get_nir_shader_compiler_options(unsigned arch)
 }
 
 void
-pan_preprocess_nir(nir_shader *nir, unsigned gpu_id)
+pan_preprocess_nir(nir_shader *nir, uint64_t gpu_id)
 {
    if (pan_arch(gpu_id) >= 6)
       bifrost_preprocess_nir(nir, gpu_id);
@@ -66,14 +66,14 @@ pan_preprocess_nir(nir_shader *nir, unsigned gpu_id)
 }
 
 void
-pan_optimize_nir(nir_shader *nir, unsigned gpu_id)
+pan_optimize_nir(nir_shader *nir, uint64_t gpu_id)
 {
    assert(pan_arch(gpu_id) >= 6);
    bifrost_optimize_nir(nir, gpu_id);
 }
 
 void
-pan_postprocess_nir(nir_shader *nir, unsigned gpu_id)
+pan_postprocess_nir(nir_shader *nir, uint64_t gpu_id)
 {
    if (pan_arch(gpu_id) >= 6)
       bifrost_postprocess_nir(nir, gpu_id);
@@ -82,7 +82,7 @@ pan_postprocess_nir(nir_shader *nir, unsigned gpu_id)
 }
 
 void
-pan_nir_lower_texture_early(nir_shader *nir, unsigned gpu_id)
+pan_nir_lower_texture_early(nir_shader *nir, uint64_t gpu_id)
 {
    nir_lower_tex_options lower_tex_options = {
       .lower_txs_lod = true,
@@ -99,7 +99,7 @@ pan_nir_lower_texture_early(nir_shader *nir, unsigned gpu_id)
 }
 
 void
-pan_nir_lower_texture_late(nir_shader *nir, unsigned gpu_id)
+pan_nir_lower_texture_late(nir_shader *nir, uint64_t gpu_id)
 {
    /* This must be called after any lowering of resource indices
     * (panfrost_nir_lower_res_indices / panvk_per_arch(nir_lower_descriptors))
@@ -298,8 +298,8 @@ pan_shader_compile(nir_shader *s, struct pan_compile_inputs *inputs,
 }
 
 void
-pan_disassemble(FILE *fp, const void *code, size_t size,
-                unsigned gpu_id, bool verbose)
+pan_disassemble(FILE *fp, const void *code, size_t size, uint64_t gpu_id,
+                bool verbose)
 {
    if (pan_arch(gpu_id) >= 9)
       disassemble_valhall(fp, (const uint64_t *)code, size, verbose);
diff --git a/src/panfrost/compiler/pan_compiler.h b/src/panfrost/compiler/pan_compiler.h
index 3022848a82e..57b09691970 100644
--- a/src/panfrost/compiler/pan_compiler.h
+++ b/src/panfrost/compiler/pan_compiler.h
@@ -23,9 +23,9 @@ bool pan_want_debug_info(unsigned arch);
 const nir_shader_compiler_options *
 pan_get_nir_shader_compiler_options(unsigned arch);
 
-void pan_preprocess_nir(nir_shader *nir, unsigned gpu_id);
-void pan_optimize_nir(nir_shader *nir, unsigned gpu_id);
-void pan_postprocess_nir(nir_shader *nir, unsigned gpu_id);
+void pan_preprocess_nir(nir_shader *nir, uint64_t gpu_id);
+void pan_optimize_nir(nir_shader *nir, uint64_t gpu_id);
+void pan_postprocess_nir(nir_shader *nir, uint64_t gpu_id);
 
 #define PAN_PRINTF_BUFFER_SIZE 16384
 
@@ -104,7 +104,7 @@ unsigned pan_lookup_pushed_ubo(struct pan_ubo_push *push, unsigned ubo,
                                unsigned offs);
 
 struct pan_compile_inputs {
-   unsigned gpu_id;
+   uint64_t gpu_id;
    uint32_t gpu_variant;
    bool is_blend, is_blit;
    bool no_idvs;
@@ -272,15 +272,13 @@ pan_varying_layout_require_layout(const struct pan_varying_layout *layout)
 enum pipe_format
 pan_varying_format(nir_alu_type type, unsigned ncomps);
 
-void
-pan_build_varying_layout_compact(struct pan_varying_layout *layout,
-                                 nir_shader *nir, unsigned gpu_id);
+void pan_build_varying_layout_compact(struct pan_varying_layout *layout,
+                                      nir_shader *nir, uint64_t gpu_id);
 
-void
-pan_varying_collect_formats(struct pan_varying_layout *registry,
-                            nir_shader *nir, unsigned gpu_id,
-                            bool trust_varying_flat_highp_types,
-                            bool lower_mediump);
+void pan_varying_collect_formats(struct pan_varying_layout *registry,
+                                 nir_shader *nir, uint64_t gpu_id,
+                                 bool trust_varying_flat_highp_types,
+                                 bool lower_mediump);
 
 struct pan_shader_varying {
    gl_varying_slot location;
@@ -560,7 +558,7 @@ pan_res_handle(unsigned table, unsigned index)
    return (table << 24) | index;
 }
 
-void pan_disassemble(FILE *fp, const void *code, size_t size,
-                     unsigned gpu_id, bool verbose);
+void pan_disassemble(FILE *fp, const void *code, size_t size, uint64_t gpu_id,
+                     bool verbose);
 
 #endif /* __PAN_COMPILER_H__ */
diff --git a/src/panfrost/compiler/pan_nir.h b/src/panfrost/compiler/pan_nir.h
index 0f927c38887..6443d04f8e8 100644
--- a/src/panfrost/compiler/pan_nir.h
+++ b/src/panfrost/compiler/pan_nir.h
@@ -55,11 +55,11 @@ bool pan_nir_lower_var_special_pan(nir_shader *shader);
 bool pan_nir_lower_noperspective_vs(nir_shader *shader);
 bool pan_nir_lower_noperspective_fs(nir_shader *shader);
 
-bool pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
+bool pan_nir_lower_vs_outputs(nir_shader *shader, uint64_t gpu_id,
                               const struct pan_varying_layout *varying_layout,
                               bool has_idvs, bool *needs_extended_fifo);
 
-bool pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
+bool pan_nir_lower_fs_inputs(nir_shader *shader, uint64_t gpu_id,
                              const struct pan_varying_layout *varying_layout,
                              struct pan_shader_info *info);
 
@@ -72,8 +72,8 @@ bool pan_nir_lower_image_index(nir_shader *shader,
 bool pan_nir_lower_texel_buffer_fetch_index(nir_shader *shader,
                                             unsigned attrib_offset);
 
-void pan_nir_lower_texture_early(nir_shader *nir, unsigned gpu_id);
-void pan_nir_lower_texture_late(nir_shader *nir, unsigned gpu_id);
+void pan_nir_lower_texture_early(nir_shader *nir, uint64_t gpu_id);
+void pan_nir_lower_texture_late(nir_shader *nir, uint64_t gpu_id);
 
 nir_alu_type
 pan_unpacked_type_for_format(const struct util_format_description *desc);
diff --git a/src/panfrost/compiler/pan_nir_collect_varyings.c b/src/panfrost/compiler/pan_nir_collect_varyings.c
index c490b83eb98..a9556b073c5 100644
--- a/src/panfrost/compiler/pan_nir_collect_varyings.c
+++ b/src/panfrost/compiler/pan_nir_collect_varyings.c
@@ -292,7 +292,8 @@ hw_varying_slot(unsigned arch, mesa_shader_stage stage, gl_varying_slot slot)
 
 void
 pan_varying_collect_formats(struct pan_varying_layout *layout, nir_shader *nir,
-                            unsigned gpu_id, bool trust_varying_flat_highp_types,
+                            uint64_t gpu_id,
+                            bool trust_varying_flat_highp_types,
                             bool lower_mediump)
 {
    assert(nir->info.stage == MESA_SHADER_VERTEX ||
@@ -362,7 +363,7 @@ pan_varying_collect_formats(struct pan_varying_layout *layout, nir_shader *nir,
 
 void
 pan_build_varying_layout_compact(struct pan_varying_layout *layout,
-                                 nir_shader *nir, unsigned gpu_id)
+                                 nir_shader *nir, uint64_t gpu_id)
 {
    pan_varying_layout_require_format(layout);
 
diff --git a/src/panfrost/compiler/pan_nir_lower_fs_inputs.c b/src/panfrost/compiler/pan_nir_lower_fs_inputs.c
index a3ab4e3eefa..2914fb43f18 100644
--- a/src/panfrost/compiler/pan_nir_lower_fs_inputs.c
+++ b/src/panfrost/compiler/pan_nir_lower_fs_inputs.c
@@ -106,7 +106,7 @@ lower_fs_input_load(struct nir_builder *b,
 }
 
 bool
-pan_nir_lower_fs_inputs(nir_shader *shader, unsigned gpu_id,
+pan_nir_lower_fs_inputs(nir_shader *shader, uint64_t gpu_id,
                         const struct pan_varying_layout *varying_layout,
                         struct pan_shader_info *info)
 {
diff --git a/src/panfrost/compiler/pan_nir_lower_vs_outputs.c b/src/panfrost/compiler/pan_nir_lower_vs_outputs.c
index a669871d099..83ac6077321 100644
--- a/src/panfrost/compiler/pan_nir_lower_vs_outputs.c
+++ b/src/panfrost/compiler/pan_nir_lower_vs_outputs.c
@@ -258,7 +258,7 @@ gather_vs_outputs(struct nir_builder *b,
 }
 
 bool
-pan_nir_lower_vs_outputs(nir_shader *shader, unsigned gpu_id,
+pan_nir_lower_vs_outputs(nir_shader *shader, uint64_t gpu_id,
                          const struct pan_varying_layout *varying_layout,
                          bool has_idvs, bool *needs_extended_fifo)
 {
diff --git a/src/panfrost/genxml/decode.c b/src/panfrost/genxml/decode.c
index d7879d55ef1..d14979c48b1 100644
--- a/src/panfrost/genxml/decode.c
+++ b/src/panfrost/genxml/decode.c
@@ -153,7 +153,7 @@ pandecode_rt(struct pandecode_context *ctx, unsigned index, uint64_t gpu_va)
 }
 
 static void
-pandecode_rts(struct pandecode_context *ctx, uint64_t gpu_va, unsigned gpu_id,
+pandecode_rts(struct pandecode_context *ctx, uint64_t gpu_va,
               const struct MALI_FRAMEBUFFER_PARAMETERS *fb)
 {
    pandecode_log(ctx, "Color Render Targets @%" PRIx64 ":\n", gpu_va);
@@ -241,7 +241,7 @@ pandecode_sample_locations(struct pandecode_context *ctx, const void *fb)
 
 struct pandecode_fbd
 GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
-                    bool is_fragment, unsigned gpu_id)
+                    bool is_fragment, uint64_t gpu_id)
 {
    const void *PANDECODE_PTR_VAR(ctx, fb, (uint64_t)gpu_va);
    pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params);
@@ -302,7 +302,7 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
    DUMP_UNPACKED(ctx, FRAMEBUFFER_PARAMETERS, params, "Parameters:\n");
 #if PAN_ARCH >= 6
    if (params.tiler)
-      GENX(pandecode_tiler)(ctx, params.tiler, gpu_id);
+      GENX(pandecode_tiler)(ctx, params.tiler);
 #endif
 
    ctx->indent--;
@@ -318,7 +318,7 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va,
    }
 
    if (is_fragment)
-      pandecode_rts(ctx, gpu_va, gpu_id, &params);
+      pandecode_rts(ctx, gpu_va, &params);
 
    return (struct pandecode_fbd){
       .rt_count = params.render_target_count,
@@ -531,8 +531,7 @@ GENX(pandecode_texture)(struct pandecode_context *ctx,
 
 #if PAN_ARCH >= 6
 void
-GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va,
-                      unsigned gpu_id)
+GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va)
 {
    pan_unpack(PANDECODE_PTR(ctx, gpu_va, struct mali_tiler_context_packed),
               TILER_CONTEXT, t);
@@ -569,7 +568,7 @@ GENX(pandecode_fau)(struct pandecode_context *ctx, uint64_t addr,
 
 uint64_t
 GENX(pandecode_shader)(struct pandecode_context *ctx, uint64_t addr,
-                       const char *label, unsigned gpu_id)
+                       const char *label, uint64_t gpu_id)
 {
    MAP_ADDR(ctx, SHADER_PROGRAM, addr, cl);
    pan_unpack(cl, SHADER_PROGRAM, desc);
@@ -691,7 +690,7 @@ GENX(pandecode_depth_stencil)(struct pandecode_context *ctx, uint64_t addr)
 void
 GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
                                    const struct MALI_SHADER_ENVIRONMENT *p,
-                                   unsigned gpu_id)
+                                   uint64_t gpu_id)
 {
    if (p->shader)
       GENX(pandecode_shader)(ctx, p->shader, "Shader", gpu_id);
@@ -709,7 +708,7 @@ GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
 void
 GENX(pandecode_blend_descs)(struct pandecode_context *ctx, uint64_t blend,
                             unsigned count, uint64_t frag_shader,
-                            unsigned gpu_id)
+                            uint64_t gpu_id)
 {
    for (unsigned i = 0; i < count; ++i) {
       struct mali_blend_packed *PANDECODE_PTR_VAR(ctx, blend_descs, blend);
@@ -726,7 +725,7 @@ GENX(pandecode_blend_descs)(struct pandecode_context *ctx, uint64_t blend,
 
 void
 GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
-                    unsigned unused, unsigned gpu_id)
+                    unsigned unused, uint64_t gpu_id)
 {
    uint64_t frag_shader = 0;
 
diff --git a/src/panfrost/genxml/decode.h b/src/panfrost/genxml/decode.h
index de3a8e13dd9..f7d83ca5525 100644
--- a/src/panfrost/genxml/decode.h
+++ b/src/panfrost/genxml/decode.h
@@ -90,15 +90,15 @@ void pandecode_validate_buffer(struct pandecode_context *ctx, uint64_t addr,
 
 /* Forward declare for all supported gens to permit thunking */
 void pandecode_jc_v4(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 void pandecode_jc_v5(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 void pandecode_jc_v6(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 void pandecode_jc_v7(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 void pandecode_jc_v9(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                     unsigned gpu_id);
+                     uint64_t gpu_id);
 
 void pandecode_abort_on_fault_v4(struct pandecode_context *ctx,
                                  uint64_t jc_gpu_va);
@@ -112,25 +112,25 @@ void pandecode_abort_on_fault_v9(struct pandecode_context *ctx,
                                  uint64_t jc_gpu_va);
 
 void pandecode_interpret_cs_v10(struct pandecode_context *ctx, uint64_t queue,
-                                uint32_t size, unsigned gpu_id, uint32_t *regs);
+                                uint32_t size, uint64_t gpu_id, uint32_t *regs);
 void pandecode_cs_binary_v10(struct pandecode_context *ctx, uint64_t bin,
-                             uint32_t bin_size, unsigned gpu_id);
+                             uint32_t bin_size);
 void pandecode_cs_trace_v10(struct pandecode_context *ctx, uint64_t trace,
-                            uint32_t trace_size, unsigned gpu_id);
+                            uint32_t trace_size, uint64_t gpu_id);
 
 void pandecode_interpret_cs_v12(struct pandecode_context *ctx, uint64_t queue,
-                                uint32_t size, unsigned gpu_id, uint32_t *regs);
+                                uint32_t size, uint64_t gpu_id, uint32_t *regs);
 void pandecode_cs_binary_v12(struct pandecode_context *ctx, uint64_t bin,
-                             uint32_t bin_size, unsigned gpu_id);
+                             uint32_t bin_size);
 void pandecode_cs_trace_v12(struct pandecode_context *ctx, uint64_t trace,
-                            uint32_t trace_size, unsigned gpu_id);
+                            uint32_t trace_size, uint64_t gpu_id);
 
 void pandecode_interpret_cs_v13(struct pandecode_context *ctx, uint64_t queue,
-                                uint32_t size, unsigned gpu_id, uint32_t *regs);
+                                uint32_t size, uint64_t gpu_id, uint32_t *regs);
 void pandecode_cs_binary_v13(struct pandecode_context *ctx, uint64_t bin,
-                             uint32_t bin_size, unsigned gpu_id);
+                             uint32_t bin_size);
 void pandecode_cs_trace_v13(struct pandecode_context *ctx, uint64_t trace,
-                            uint32_t trace_size, unsigned gpu_id);
+                            uint32_t trace_size, uint64_t gpu_id);
 
 /* Logging infrastructure */
 static void
@@ -206,7 +206,7 @@ pandecode_log_cont(struct pandecode_context *ctx, const char *format, ...)
    }
 
 void pandecode_shader_disassemble(struct pandecode_context *ctx,
-                                  uint64_t shader_ptr, unsigned gpu_id);
+                                  uint64_t shader_ptr, uint64_t gpu_id);
 
 #ifdef PAN_ARCH
 
@@ -218,16 +218,16 @@ struct pandecode_fbd {
 
 struct pandecode_fbd GENX(pandecode_fbd)(struct pandecode_context *ctx,
                                          uint64_t gpu_va, bool is_fragment,
-                                         unsigned gpu_id);
+                                         uint64_t gpu_id);
 
 #if PAN_ARCH >= 9
 void GENX(pandecode_dcd)(struct pandecode_context *ctx,
                          const struct MALI_DRAW *p, unsigned unused,
-                         unsigned gpu_id);
+                         uint64_t gpu_id);
 #else
 void GENX(pandecode_dcd)(struct pandecode_context *ctx,
                          const struct MALI_DRAW *p, enum mali_job_type job_type,
-                         unsigned gpu_id);
+                         uint64_t gpu_id);
 #endif
 
 #if PAN_ARCH <= 5
@@ -246,15 +246,14 @@ uint64_t GENX(pandecode_blend)(struct pandecode_context *ctx,
 #endif
 
 #if PAN_ARCH >= 6
-void GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va,
-                           unsigned gpu_id);
+void GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va);
 #endif
 
 #if PAN_ARCH >= 9
 #if PAN_ARCH < 12
 void GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
                                         const struct MALI_SHADER_ENVIRONMENT *p,
-                                        unsigned gpu_id);
+                                        uint64_t gpu_id);
 #endif
 
 void GENX(pandecode_resource_tables)(struct pandecode_context *ctx,
@@ -264,11 +263,11 @@ void GENX(pandecode_fau)(struct pandecode_context *ctx, uint64_t addr,
                          unsigned count, const char *name);
 
 uint64_t GENX(pandecode_shader)(struct pandecode_context *ctx, uint64_t addr,
-                                const char *label, unsigned gpu_id);
+                                const char *label, uint64_t gpu_id);
 
 void GENX(pandecode_blend_descs)(struct pandecode_context *ctx, uint64_t blend,
                                  unsigned count, uint64_t frag_shader,
-                                 unsigned gpu_id);
+                                 uint64_t gpu_id);
 
 void GENX(pandecode_depth_stencil)(struct pandecode_context *ctx,
                                    uint64_t addr);
diff --git a/src/panfrost/genxml/decode_common.c b/src/panfrost/genxml/decode_common.c
index 27b79b676d9..c0e816eebf2 100644
--- a/src/panfrost/genxml/decode_common.c
+++ b/src/panfrost/genxml/decode_common.c
@@ -352,7 +352,7 @@ pandecode_dump_mappings(struct pandecode_context *ctx)
 
 void
 pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                         unsigned gpu_id)
+                         uint64_t gpu_id)
 {
    simple_mtx_lock(&ctx->lock);
 
@@ -380,7 +380,7 @@ pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va,
 }
 
 void
-pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, unsigned gpu_id)
+pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, uint64_t gpu_id)
 {
    simple_mtx_lock(&ctx->lock);
 
@@ -409,7 +409,7 @@ pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, unsigned gpu_id)
 
 void
 pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
-                       uint32_t size, unsigned gpu_id, uint32_t *regs)
+                       uint32_t size, uint64_t gpu_id, uint32_t *regs)
 {
    simple_mtx_lock(&ctx->lock);
 
@@ -432,19 +432,19 @@ pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
 
 void
 pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
-                   uint32_t size, unsigned gpu_id)
+                    uint32_t size, uint64_t gpu_id)
 {
    simple_mtx_lock(&ctx->lock);
 
    switch (pan_arch(gpu_id)) {
    case 10:
-      pandecode_cs_binary_v10(ctx, bin_gpu_va, size, gpu_id);
+      pandecode_cs_binary_v10(ctx, bin_gpu_va, size);
       break;
    case 12:
-      pandecode_cs_binary_v12(ctx, bin_gpu_va, size, gpu_id);
+      pandecode_cs_binary_v12(ctx, bin_gpu_va, size);
       break;
    case 13:
-      pandecode_cs_binary_v13(ctx, bin_gpu_va, size, gpu_id);
+      pandecode_cs_binary_v13(ctx, bin_gpu_va, size);
       break;
    default:
       UNREACHABLE("Unsupported architecture");
@@ -455,7 +455,7 @@ pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
 
 void
 pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
-                   uint32_t size, unsigned gpu_id)
+                   uint32_t size, uint64_t gpu_id)
 {
    simple_mtx_lock(&ctx->lock);
 
@@ -478,7 +478,7 @@ pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
 
 void
 pandecode_shader_disassemble(struct pandecode_context *ctx, uint64_t shader_ptr,
-                             unsigned gpu_id)
+                             uint64_t gpu_id)
 {
    uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr);
 
diff --git a/src/panfrost/genxml/decode_csf.c b/src/panfrost/genxml/decode_csf.c
index ab7f96b337a..ca3b4807950 100644
--- a/src/panfrost/genxml/decode_csf.c
+++ b/src/panfrost/genxml/decode_csf.c
@@ -46,7 +46,7 @@ struct queue_ctx {
    } call_stack[MAX_CALL_STACK_DEPTH + 1]; /* +1 for exception handler */
    uint8_t call_stack_depth;
 
-   unsigned gpu_id;
+   uint64_t gpu_id;
 };
 
 static void
@@ -789,7 +789,7 @@ pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp,
    if (tiler_flags.index_type)
       pandecode_log(ctx, "Index array size: %u\n", cs_get_u32(qctx, 39));
 
-   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
+   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40));
 
    DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
    pandecode_log(ctx, "Low depth clamp: %f\n", uif(cs_get_u32(qctx, 44)));
@@ -902,7 +902,7 @@ pandecode_run_idvs2(struct pandecode_context *ctx, FILE *fp,
    pandecode_log(ctx, "Vertex offset: %u\n", vertex_offset);
    pandecode_log(ctx, "Instance offset: %u\n", instance_offset);
 
-   GENX(pandecode_tiler)(ctx, tilder_descriptor_pointer, qctx->gpu_id);
+   GENX(pandecode_tiler)(ctx, tilder_descriptor_pointer);
 
    /* If this is true, then the scissor is actually a pointer to an
     * array of boxes; bottom 56 bits are the pointer and top 8 are
@@ -1062,8 +1062,7 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
       pandecode_log(ctx, "Index array size: %u\n",
                     cs_get_u32(qctx, MALI_IDVS_SR_INDEX_BUFFER_SIZE));
 
-   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX),
-                         qctx->gpu_id);
+   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX));
 
    DUMP_CL(ctx, SCISSOR, &qctx->regs[MALI_IDVS_SR_SCISSOR_BOX], "Scissor\n");
    pandecode_log(ctx, "Low depth clamp: %f\n",
@@ -1134,7 +1133,7 @@ pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp,
    pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
    DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
 
-   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
+   GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40));
 
    DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
 
@@ -1669,7 +1668,7 @@ no_interpret:
 
 void
 GENX(pandecode_interpret_cs)(struct pandecode_context *ctx, uint64_t queue,
-                             uint32_t size, unsigned gpu_id, uint32_t *regs)
+                             uint32_t size, uint64_t gpu_id, uint32_t *regs)
 {
    pandecode_dump_file_open(ctx);
 
@@ -2451,7 +2450,7 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin,
 
 void
 GENX(pandecode_cs_binary)(struct pandecode_context *ctx, uint64_t bin,
-                          uint32_t bin_size, unsigned gpu_id)
+                          uint32_t bin_size)
 {
    if (!bin_size)
       return;
@@ -2478,7 +2477,7 @@ GENX(pandecode_cs_binary)(struct pandecode_context *ctx, uint64_t bin,
 
 void
 GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
-                         uint32_t trace_size, unsigned gpu_id)
+                         uint32_t trace_size, uint64_t gpu_id)
 {
    pandecode_dump_file_open(ctx);
 
diff --git a/src/panfrost/genxml/decode_jm.c b/src/panfrost/genxml/decode_jm.c
index ea0d3b063ed..4fab8b76bd8 100644
--- a/src/panfrost/genxml/decode_jm.c
+++ b/src/panfrost/genxml/decode_jm.c
@@ -238,7 +238,7 @@ pandecode_uniforms(struct pandecode_context *ctx, uint64_t uniforms,
 
 void
 GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
-                    enum mali_job_type job_type, unsigned gpu_id)
+                    enum mali_job_type job_type, uint64_t gpu_id)
 {
 #if PAN_ARCH >= 5
    struct pandecode_fbd fbd_info = {.rt_count = 1};
@@ -385,7 +385,7 @@ GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
 static void
 pandecode_vertex_compute_geometry_job(struct pandecode_context *ctx,
                                       const struct MALI_JOB_HEADER *h,
-                                      uint64_t job, unsigned gpu_id)
+                                      uint64_t job, uint64_t gpu_id)
 {
    struct mali_compute_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
    pan_section_unpack(p, COMPUTE_JOB, DRAW, draw);
@@ -422,7 +422,7 @@ pandecode_cache_flush_job(struct pandecode_context *ctx, uint64_t job)
 static void
 pandecode_tiler_job(struct pandecode_context *ctx,
                     const struct MALI_JOB_HEADER *h, uint64_t job,
-                    unsigned gpu_id)
+                    uint64_t gpu_id)
 {
    struct mali_tiler_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
    pan_section_unpack(p, TILER_JOB, DRAW, draw);
@@ -441,7 +441,7 @@ pandecode_tiler_job(struct pandecode_context *ctx,
 
 #if PAN_ARCH >= 6
    pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr);
-   GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
+   GENX(pandecode_tiler)(ctx, tiler_ptr.address);
 
 #if PAN_ARCH >= 9
    DUMP_SECTION(ctx, TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n");
@@ -459,7 +459,7 @@ pandecode_tiler_job(struct pandecode_context *ctx,
 
 static void
 pandecode_fragment_job(struct pandecode_context *ctx, uint64_t job,
-                       unsigned gpu_id)
+                       uint64_t gpu_id)
 {
    struct mali_fragment_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
    pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s);
@@ -497,7 +497,7 @@ pandecode_fragment_job(struct pandecode_context *ctx, uint64_t job,
 static void
 pandecode_indexed_vertex_job(struct pandecode_context *ctx,
                              const struct MALI_JOB_HEADER *h, uint64_t job,
-                             unsigned gpu_id)
+                             uint64_t gpu_id)
 {
    struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
 
@@ -518,7 +518,7 @@ pandecode_indexed_vertex_job(struct pandecode_context *ctx,
    pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr);
    pandecode_log(ctx, "Tiler Job Payload:\n");
    ctx->indent++;
-   GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
+   GENX(pandecode_tiler)(ctx, tiler_ptr.address);
    ctx->indent--;
 
    pandecode_invocation(ctx,
@@ -535,7 +535,7 @@ pandecode_indexed_vertex_job(struct pandecode_context *ctx,
 #if PAN_ARCH == 9
 static void
 pandecode_malloc_vertex_job(struct pandecode_context *ctx, uint64_t job,
-                            unsigned gpu_id)
+                            uint64_t gpu_id)
 {
    struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
 
@@ -553,7 +553,7 @@ pandecode_malloc_vertex_job(struct pandecode_context *ctx, uint64_t job,
    pandecode_log(ctx, "Tiler Job Payload:\n");
    ctx->indent++;
    if (tiler_ptr.address)
-      GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id);
+      GENX(pandecode_tiler)(ctx, tiler_ptr.address);
    else
       pandecode_log(ctx, "<omitted>\n");
    ctx->indent--;
@@ -568,7 +568,7 @@ pandecode_malloc_vertex_job(struct pandecode_context *ctx, uint64_t job,
 
 static void
 pandecode_compute_job(struct pandecode_context *ctx, uint64_t job,
-                      unsigned gpu_id)
+                      uint64_t gpu_id)
 {
    struct mali_compute_job_packed *PANDECODE_PTR_VAR(ctx, p, job);
    pan_section_unpack(p, COMPUTE_JOB, PAYLOAD, payload);
@@ -584,7 +584,7 @@ pandecode_compute_job(struct pandecode_context *ctx, uint64_t job,
  */
 void
 GENX(pandecode_jc)(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                   unsigned gpu_id)
+                   uint64_t gpu_id)
 {
    pandecode_dump_file_open(ctx);
 
diff --git a/src/panfrost/lib/kmod/pan_kmod.h b/src/panfrost/lib/kmod/pan_kmod.h
index 5abe8eb706b..e7356330e7d 100644
--- a/src/panfrost/lib/kmod/pan_kmod.h
+++ b/src/panfrost/lib/kmod/pan_kmod.h
@@ -176,7 +176,7 @@ struct pan_kmod_bo {
 /* List of GPU properties needed by the UMD. */
 struct pan_kmod_dev_props {
    /* GPU ID. */
-   uint32_t gpu_id;
+   uint64_t gpu_id;
 
    /* GPU variant. */
    uint32_t gpu_variant;
diff --git a/src/panfrost/lib/wrap.h b/src/panfrost/lib/wrap.h
index 3cf6d15a498..3c14aa64590 100644
--- a/src/panfrost/lib/wrap.h
+++ b/src/panfrost/lib/wrap.h
@@ -39,19 +39,19 @@ void pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va,
                            unsigned sz);
 
 void pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                  unsigned gpu_id);
+                  uint64_t gpu_id);
 
 void pandecode_interpret_cs(struct pandecode_context *ctx,
                             uint64_t queue_gpu_va, uint32_t size,
-                            unsigned gpu_id, uint32_t *regs);
+                            uint64_t gpu_id, uint32_t *regs);
 
 void pandecode_cs_binary(struct pandecode_context *ctx, uint64_t binary_gpu_va,
-                         uint32_t size, unsigned gpu_id);
+                         uint32_t size, uint64_t gpu_id);
 
 void pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
-                        uint32_t size, unsigned gpu_id);
+                        uint32_t size, uint64_t gpu_id);
 
 void pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va,
-                              unsigned gpu_id);
+                              uint64_t gpu_id);
 
 #endif /* __MMAP_TRACE_H__ */
diff --git a/src/panfrost/model/pan_model.c b/src/panfrost/model/pan_model.c
index 1e173be4381..2ce6157b74e 100644
--- a/src/panfrost/model/pan_model.c
+++ b/src/panfrost/model/pan_model.c
@@ -124,7 +124,7 @@ const struct pan_model pan_model_list[] = {
 #undef MODEL_QUIRKS
 
 static uint32_t
-get_prod_id(uint32_t gpu_id)
+get_prod_id(uint64_t gpu_id)
 {
    unsigned arch = pan_arch(gpu_id);
    if (arch < 6)
@@ -140,7 +140,7 @@ get_prod_id(uint32_t gpu_id)
  * supported at this time.
  */
 const struct pan_model *
-pan_get_model(uint32_t gpu_id, uint32_t gpu_variant)
+pan_get_model(uint64_t gpu_id, uint32_t gpu_variant)
 {
    uint32_t gpu_prod_id = get_prod_id(gpu_id);
    for (unsigned i = 0; i < ARRAY_SIZE(pan_model_list); ++i) {
diff --git a/src/panfrost/model/pan_model.h b/src/panfrost/model/pan_model.h
index 27b7fe8d21c..9079f9d6f4c 100644
--- a/src/panfrost/model/pan_model.h
+++ b/src/panfrost/model/pan_model.h
@@ -79,14 +79,14 @@ struct pan_model {
    } quirks;
 };
 
-const struct pan_model *pan_get_model(uint32_t gpu_id, uint32_t gpu_variant);
+const struct pan_model *pan_get_model(uint64_t gpu_id, uint32_t gpu_variant);
 
 /* Returns the architecture version given a GPU ID, either from a table for
  * old-style Midgard versions or directly for new-style Bifrost/Valhall
  * versions */
 
 static inline unsigned
-pan_arch(unsigned gpu_id)
+pan_arch(uint64_t gpu_id)
 {
    switch (MIDGARD_PRODUCT_ID(gpu_id)) {
    case 0x600:
diff --git a/src/panfrost/tools/panfrostdump.c b/src/panfrost/tools/panfrostdump.c
index 5e9d8df6963..bb650bc6faf 100644
--- a/src/panfrost/tools/panfrostdump.c
+++ b/src/panfrost/tools/panfrostdump.c
@@ -45,7 +45,7 @@ struct panfrost_dump_object_header_ho {
    union {
       struct pan_reg_hdr_ho {
          uint64_t jc;
-         uint32_t gpu_id;
+         uint64_t gpu_id;
          uint32_t major;
          uint32_t minor;
          uint64_t nbos;
@@ -93,7 +93,7 @@ read_header(FILE *fp, struct panfrost_dump_object_header_ho *pdoh)
    switch (pdoh->type) {
    case PANFROSTDUMP_BUF_REG:
       pdoh->reghdr.jc = le64toh(doh_le.reghdr.jc);
-      pdoh->reghdr.gpu_id = le32toh(doh_le.reghdr.gpu_id);
+      pdoh->reghdr.gpu_id = le64toh(doh_le.reghdr.gpu_id);
       pdoh->reghdr.major = le32toh(doh_le.reghdr.major);
       pdoh->reghdr.minor = le32toh(doh_le.reghdr.minor);
       pdoh->reghdr.nbos = le64toh(doh_le.reghdr.nbos);
@@ -195,7 +195,7 @@ main(int argc, char *argv[])
    struct panfrost_dump_object_header_ho doh;
    bool print_addr = false;
    bool print_reg = false;
-   uint32_t gpu_id = 0;
+   uint64_t gpu_id = 0;
    uint64_t jc = 0;
    size_t nbytes;
    int i, j, k, c;
@@ -269,7 +269,7 @@ main(int argc, char *argv[])
          return EXIT_FAILURE;
       }
 
-      printf("JC: %" PRIX64 ", GPU_ID: %" PRIX32 "\n", jc, gpu_id);
+      printf("JC: %" PRIX64 ", GPU_ID: %" PRIX64 "\n", jc, gpu_id);
 
       if (print_reg) {
          puts("GPU registers:");
diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c
index 2d348601714..6436f8ab5d1 100644
--- a/src/panfrost/vulkan/panvk_physical_device.c
+++ b/src/panfrost/vulkan/panvk_physical_device.c
@@ -160,9 +160,10 @@ init_shader_caches(struct panvk_physical_device *device,
    memcpy(device->cache_uuid, blake3, VK_UUID_SIZE);
 
 #ifdef ENABLE_SHADER_CACHE
-   char renderer[17];
-   ASSERTED int len = snprintf(renderer, sizeof(renderer), "panvk_0x%08x",
-                               device->kmod.dev->props.gpu_id);
+   char renderer[25];
+   ASSERTED int len =
+      snprintf(renderer, sizeof(renderer), "panvk_0x%016" PRIx64,
+               device->kmod.dev->props.gpu_id);
    assert(len == sizeof(renderer) - 1);
 
    char timestamp[BLAKE3_HEX_LEN];
@@ -401,7 +402,7 @@ panvk_physical_device_init(struct panvk_physical_device *device,
 
    if (!device->model) {
       result = panvk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
-                            "Unknown gpu_id (%#x) or variant (%#x)",
+                            "Unknown gpu_id (%#" PRIx64 ") or variant (%#x)",
                             device->kmod.dev->props.gpu_id,
                             device->kmod.dev->props.gpu_variant);
       goto fail;
diff --git a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
index 417ba9567e2..903b6762a5a 100644
--- a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
@@ -830,9 +830,10 @@ cmd_preload_zs_attachments(struct panvk_cmd_buffer *cmdbuf,
    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
    struct panvk_physical_device *pdev =
       to_panvk_physical_device(dev->vk.physical);
-   unsigned gpu_prod_id = pdev->kmod.dev->props.gpu_id >> 16;
+   unsigned arch_major = PAN_ARCH_MAJOR(pdev->kmod.dev->props.gpu_id);
+   unsigned arch_minor = PAN_ARCH_MINOR(pdev->kmod.dev->props.gpu_id);
 
-   if (gpu_prod_id >= 0x7200)
+   if (arch_major > 7 || (arch_major == 7 && arch_minor >= 2))
       fs->modes[dcd_idx] = MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS;
    else if (always_load(load, &key))
       fs->modes[dcd_idx] = MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;