diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 15680c000bc..efd9d22e338 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -4458,12 +4458,13 @@ prepare_shader(struct panfrost_compiled_shader *state,
#endif
#if PAN_ARCH >= 15
cfg.register_count = state->info.work_reg_count;
+ cfg.preload.r0_r15 = state->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(state->info.work_reg_count);
+ cfg.preload.r48_r63 = (state->info.preload >> 48);
#endif
cfg.binary = state->bin.gpu;
- cfg.preload.r48_r63 = (state->info.preload >> 48);
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
@@ -4481,12 +4482,13 @@ prepare_shader(struct panfrost_compiled_shader *state,
#endif
#if PAN_ARCH >= 15
cfg.register_count = state->info.work_reg_count;
+ cfg.preload.r0_r15 = state->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(state->info.work_reg_count);
+ cfg.preload.r48_r63 = (state->info.preload >> 48);
#endif
cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset;
- cfg.preload.r48_r63 = (state->info.preload >> 48);
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
}
diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c
index be4c2c9965e..6f1bba8ab66 100644
--- a/src/gallium/drivers/panfrost/pan_fb_preload.c
+++ b/src/gallium/drivers/panfrost/pan_fb_preload.c
@@ -1107,11 +1107,12 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
#if PAN_ARCH >= 15
cfg.register_count = preload_shader->info.work_reg_count;
+ cfg.preload.r0_r15 = preload_shader->info.preload;
#else
cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
+ cfg.preload.r48_r63 = preload_shader->info.preload >> 48;
#endif
cfg.binary = preload_shader->address;
- cfg.preload.r48_r63 = preload_shader->info.preload >> 48;
}
unsigned bd_count = views.rt_count;
diff --git a/src/gallium/drivers/panfrost/pan_precomp.c b/src/gallium/drivers/panfrost/pan_precomp.c
index 9ca77b32dd7..bc4640d7e2a 100644
--- a/src/gallium/drivers/panfrost/pan_precomp.c
+++ b/src/gallium/drivers/panfrost/pan_precomp.c
@@ -100,12 +100,13 @@ panfrost_precomp_shader_create(
cfg.stage = pan_shader_stage(&res->info);
#if PAN_ARCH >= 15
cfg.register_count = res->info.work_reg_count;
+ cfg.preload.r0_r15 = res->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(res->info.work_reg_count);
+ cfg.preload.r48_r63 = (res->info.preload >> 48);
#endif
cfg.binary = res->code_ptr;
- cfg.preload.r48_r63 = (res->info.preload >> 48);
cfg.flush_to_zero_mode = panfrost_ftz_mode(&res->info);
}
diff --git a/src/panfrost/compiler/bifrost/bi_ra.c b/src/panfrost/compiler/bifrost/bi_ra.c
index f8579f8c983..7f058bf0d3f 100644
--- a/src/panfrost/compiler/bifrost/bi_ra.c
+++ b/src/panfrost/compiler/bifrost/bi_ra.c
@@ -294,7 +294,8 @@ bi_compute_liveness_ra(bi_context *ctx)
#define EVEN_BITS_MASK (0x5555555555555555ull)
static uint64_t
-bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
+bi_make_affinity(uint64_t clobber, unsigned count, bool split_file,
+ unsigned arch)
{
uint64_t clobbered = 0;
@@ -308,12 +309,12 @@ bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
clobbered |= mask << (64 - excess);
if (split_file)
- clobbered |= mask << (16 - excess);
+ clobbered |= mask << (((arch >= 15) ? 32 : 16) - excess);
}
/* Don't allocate the middle if we split out the middle */
if (split_file)
- clobbered |= BITFIELD64_MASK(32) << 16;
+ clobbered |= BITFIELD64_MASK(32) << ((arch >= 15) ? 32 : 16);
/* We can use a register iff it's not clobberred */
return ~clobbered;
@@ -341,7 +342,7 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
unsigned count = bi_count_write_registers(ins, d);
unsigned offset = ins->dest[d].offset;
uint64_t affinity =
- bi_make_affinity(preload_live, count, split_file) >> offset;
+ bi_make_affinity(preload_live, count, split_file, arch) >> offset;
/* Valhall needs >= 64-bit staging writes to be pair-aligned */
if (aligned_sr && (count >= 2 || offset))
affinity &= EVEN_BITS_MASK;
@@ -435,7 +436,8 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
uint64_t default_affinity =
ctx->inputs->is_blend ? BITFIELD64_MASK(16)
: full_regs ? BITFIELD64_MASK(64)
- : (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48));
+ : (ctx->arch >= 15) ? BITFIELD64_MASK(32)
+ : (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48));
/* To test spilling, mimic a small register file */
if (bifrost_debug & BIFROST_DBG_SPILL && !ctx->inputs->is_blend && (bifrost_debug & BIFROST_DBG_NOSSARA))
diff --git a/src/panfrost/compiler/bifrost/compiler.h b/src/panfrost/compiler/bifrost/compiler.h
index 39b4f8d09a0..20b553f43da 100644
--- a/src/panfrost/compiler/bifrost/compiler.h
+++ b/src/panfrost/compiler/bifrost/compiler.h
@@ -1162,25 +1162,25 @@ bi_preload_reg(enum bi_preload val, unsigned arch)
/* Compute */
case BI_PRELOAD_LOCAL_ID_0:
/* Bits [15;0] */
- return 55;
+ return (arch >= 15) ? 4 : 55;
case BI_PRELOAD_LOCAL_ID_1:
/* Bits [31;16] */
- return 55;
+ return (arch >= 15) ? 4 : 55;
case BI_PRELOAD_LOCAL_ID_2:
/* Bits [15;0] */
- return 56;
+ return (arch >= 15) ? 3 : 56;
case BI_PRELOAD_WORKGROUP_ID_0:
- return 57;
+ return (arch >= 15) ? 5 : 57;
case BI_PRELOAD_WORKGROUP_ID_1:
- return 58;
+ return (arch >= 15) ? 6 : 58;
case BI_PRELOAD_WORKGROUP_ID_2:
- return 59;
+ return (arch >= 15) ? 7 : 59;
case BI_PRELOAD_GLOBAL_ID_0:
- return 60;
+ return (arch >= 15) ? 0 : 60;
case BI_PRELOAD_GLOBAL_ID_1:
- return 61;
+ return (arch >= 15) ? 1 : 61;
case BI_PRELOAD_GLOBAL_ID_2:
- return 62;
+ return (arch >= 15) ? 2 : 62;
/* Vertex */
case BI_PRELOAD_POS_RESULT_PTR_LO:
assert(arch < 9);
@@ -1190,58 +1190,58 @@ bi_preload_reg(enum bi_preload val, unsigned arch)
return 59;
case BI_PRELOAD_INTERNAL_ID:
assert(arch >= 9);
- return 59;
+ return (arch >= 15) ? 2 : 59;
case BI_PRELOAD_VERTEX_ID:
- return (arch >= 9) ? 60 : 61;
+ return (arch >= 15) ? 0 : (arch >= 9) ? 60 : 61;
case BI_PRELOAD_INSTANCE_ID:
- return (arch >= 9) ? 61 : 62;
+ return (arch >= 15) ? 1 : (arch >= 9) ? 61 : 62;
case BI_PRELOAD_DRAW_ID:
assert(arch >= 9);
- return 62;
+ return (arch >= 15) ? 3 : 62;
case BI_PRELOAD_VIEW_ID:
assert(arch >= 9);
- return 63;
+ return (arch >= 15) ? 4 : 63;
/* Fragment */
case BI_PRELOAD_PRIMITIVE_ID:
- return 57;
+ return (arch >= 15) ? 6 : 57;
case BI_PRELOAD_PRIMITIVE_FLAGS:
- return 58;
+ return (arch >= 15) ? 3 : 58;
case BI_PRELOAD_POSITION_XY:
- return 59;
+ return (arch >= 15) ? 2 : 59;
case BI_PRELOAD_CUMULATIVE_COVERAGE:
/* Bits [15;0] */
- return 60;
+ return (arch >= 15) ? 0 : 60;
case BI_PRELOAD_RASTERIZER_COVERAGE:
/* Bits [15;0] */
- return 61;
+ return (arch >= 15) ? 1 : 61;
case BI_PRELOAD_SAMPLE_ID:
/* Bits [23;16] */
- return 61;
+ return (arch >= 15) ? 0 : 61;
case BI_PRELOAD_CENTROID_ID:
/* Bits [31;24] */
- return 61;
+ return (arch >= 15) ? 0 : 61;
case BI_PRELOAD_FRAME_ARG:
/* Double reg */
- return 62;
+ return (arch >= 15) ? 4 : 62;
/* Blend */
case BI_PRELOAD_BLEND_SRC0_C0:
- return 0;
+ return (arch >= 15) ? 8 : 0;
case BI_PRELOAD_BLEND_SRC0_C1:
- return 1;
+ return (arch >= 15) ? 9 : 1;
case BI_PRELOAD_BLEND_SRC0_C2:
- return 2;
+ return (arch >= 15) ? 10 : 2;
case BI_PRELOAD_BLEND_SRC0_C3:
- return 3;
+ return (arch >= 15) ? 11 : 3;
case BI_PRELOAD_BLEND_SRC1_C0:
- return 4;
+ return (arch >= 15) ? 12 : 4;
case BI_PRELOAD_BLEND_SRC1_C1:
- return 5;
+ return (arch >= 15) ? 13 : 5;
case BI_PRELOAD_BLEND_SRC1_C2:
- return 6;
+ return (arch >= 15) ? 14 : 6;
case BI_PRELOAD_BLEND_SRC1_C3:
- return 7;
+ return (arch >= 15) ? 15 : 7;
case BI_PRELOAD_BLEND_LINK:
- return 48;
+ return (arch >= 15) ? 7 : 48;
}
UNREACHABLE("Non-handled BI_PRELOAD");
}
diff --git a/src/panfrost/genxml/v15.xml b/src/panfrost/genxml/v15.xml
index 983834f16e4..f1a2bc7e2c9 100644
--- a/src/panfrost/genxml/v15.xml
+++ b/src/panfrost/genxml/v15.xml
@@ -2016,16 +2016,16 @@
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
diff --git a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
index afa2692bde0..d687fa1d6cf 100644
--- a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
+++ b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c
@@ -241,12 +241,13 @@ get_frame_shader(struct panvk_device *dev,
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
#if PAN_ARCH >= 15
cfg.register_count = shader->info.work_reg_count;
+ cfg.preload.r0_r15 = shader->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
+ cfg.preload.r48_r63 = shader->info.preload >> 48;
#endif
cfg.binary = panvk_priv_mem_dev_addr(shader->code_mem);
- cfg.preload.r48_r63 = shader->info.preload >> 48;
}
#endif
diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c
index 927a72cd7c9..639288e6e01 100644
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@@ -1182,12 +1182,13 @@ panvk_shader_upload(struct panvk_device *dev,
#if PAN_ARCH >= 15
cfg.register_count = shader->info.work_reg_count;
+ cfg.preload.r0_r15 = shader->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
+ cfg.preload.r48_r63 = (shader->info.preload >> 48);
#endif
cfg.binary = panvk_shader_variant_get_dev_addr(shader);
- cfg.preload.r48_r63 = (shader->info.preload >> 48);
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
@@ -1205,12 +1206,13 @@ panvk_shader_upload(struct panvk_device *dev,
cfg.stage = pan_shader_stage(&shader->info);
#if PAN_ARCH >= 15
cfg.register_count = shader->info.work_reg_count;
+ cfg.preload.r0_r15 = shader->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
+ cfg.preload.r48_r63 = (shader->info.preload >> 48);
#endif
cfg.binary = panvk_shader_variant_get_dev_addr(shader);
- cfg.preload.r48_r63 = (shader->info.preload >> 48);
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
}
@@ -1224,13 +1226,14 @@ panvk_shader_upload(struct panvk_device *dev,
cfg.stage = pan_shader_stage(&shader->info);
#if PAN_ARCH >= 15
cfg.register_count = shader->info.work_reg_count;
+ cfg.preload.r0_r15 = shader->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
+ cfg.preload.r48_r63 = (shader->info.preload >> 48);
#endif
cfg.binary = panvk_shader_variant_get_dev_addr(shader) +
shader->info.vs.no_psiz_offset;
- cfg.preload.r48_r63 = (shader->info.preload >> 48);
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
}
#else