pan/va: Implement v15 preload registers

With the change in register count, preloads move from r48-r63 to r0-r15.
Update the preload logic to reflect this.
This commit is contained in:
Lars-Ivar Hesselberg Simonsen 2026-03-13 14:36:27 +01:00
parent ad81596b6d
commit fae53403de
8 changed files with 64 additions and 54 deletions

View file

@ -4458,12 +4458,13 @@ prepare_shader(struct panfrost_compiled_shader *state,
#endif
#if PAN_ARCH >= 15
cfg.register_count = state->info.work_reg_count;
cfg.preload.r0_r15 = state->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(state->info.work_reg_count);
cfg.preload.r48_r63 = (state->info.preload >> 48);
#endif
cfg.binary = state->bin.gpu;
cfg.preload.r48_r63 = (state->info.preload >> 48);
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
@ -4481,12 +4482,13 @@ prepare_shader(struct panfrost_compiled_shader *state,
#endif
#if PAN_ARCH >= 15
cfg.register_count = state->info.work_reg_count;
cfg.preload.r0_r15 = state->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(state->info.work_reg_count);
cfg.preload.r48_r63 = (state->info.preload >> 48);
#endif
cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset;
cfg.preload.r48_r63 = (state->info.preload >> 48);
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
}

View file

@ -1107,11 +1107,12 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
#if PAN_ARCH >= 15
cfg.register_count = preload_shader->info.work_reg_count;
cfg.preload.r0_r15 = preload_shader->info.preload;
#else
cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
cfg.preload.r48_r63 = preload_shader->info.preload >> 48;
#endif
cfg.binary = preload_shader->address;
cfg.preload.r48_r63 = preload_shader->info.preload >> 48;
}
unsigned bd_count = views.rt_count;

View file

@ -100,12 +100,13 @@ panfrost_precomp_shader_create(
cfg.stage = pan_shader_stage(&res->info);
#if PAN_ARCH >= 15
cfg.register_count = res->info.work_reg_count;
cfg.preload.r0_r15 = res->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(res->info.work_reg_count);
cfg.preload.r48_r63 = (res->info.preload >> 48);
#endif
cfg.binary = res->code_ptr;
cfg.preload.r48_r63 = (res->info.preload >> 48);
cfg.flush_to_zero_mode = panfrost_ftz_mode(&res->info);
}

View file

@ -294,7 +294,8 @@ bi_compute_liveness_ra(bi_context *ctx)
#define EVEN_BITS_MASK (0x5555555555555555ull)
static uint64_t
bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
bi_make_affinity(uint64_t clobber, unsigned count, bool split_file,
unsigned arch)
{
uint64_t clobbered = 0;
@ -308,12 +309,12 @@ bi_make_affinity(uint64_t clobber, unsigned count, bool split_file)
clobbered |= mask << (64 - excess);
if (split_file)
clobbered |= mask << (16 - excess);
clobbered |= mask << (((arch >= 15) ? 32 : 16) - excess);
}
/* Don't allocate the middle if we split out the middle */
if (split_file)
clobbered |= BITFIELD64_MASK(32) << 16;
clobbered |= BITFIELD64_MASK(32) << ((arch >= 15) ? 32 : 16);
/* We can use a register iff it's not clobberred */
return ~clobbered;
@ -341,7 +342,7 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live,
unsigned count = bi_count_write_registers(ins, d);
unsigned offset = ins->dest[d].offset;
uint64_t affinity =
bi_make_affinity(preload_live, count, split_file) >> offset;
bi_make_affinity(preload_live, count, split_file, arch) >> offset;
/* Valhall needs >= 64-bit staging writes to be pair-aligned */
if (aligned_sr && (count >= 2 || offset))
affinity &= EVEN_BITS_MASK;
@ -435,7 +436,8 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs)
uint64_t default_affinity =
ctx->inputs->is_blend ? BITFIELD64_MASK(16)
: full_regs ? BITFIELD64_MASK(64)
: (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48));
: (ctx->arch >= 15) ? BITFIELD64_MASK(32)
: (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48));
/* To test spilling, mimic a small register file */
if (bifrost_debug & BIFROST_DBG_SPILL && !ctx->inputs->is_blend && (bifrost_debug & BIFROST_DBG_NOSSARA))

View file

@ -1162,25 +1162,25 @@ bi_preload_reg(enum bi_preload val, unsigned arch)
/* Compute */
case BI_PRELOAD_LOCAL_ID_0:
/* Bits [15;0] */
return 55;
return (arch >= 15) ? 4 : 55;
case BI_PRELOAD_LOCAL_ID_1:
/* Bits [31;16] */
return 55;
return (arch >= 15) ? 4 : 55;
case BI_PRELOAD_LOCAL_ID_2:
/* Bits [15;0] */
return 56;
return (arch >= 15) ? 3 : 56;
case BI_PRELOAD_WORKGROUP_ID_0:
return 57;
return (arch >= 15) ? 5 : 57;
case BI_PRELOAD_WORKGROUP_ID_1:
return 58;
return (arch >= 15) ? 6 : 58;
case BI_PRELOAD_WORKGROUP_ID_2:
return 59;
return (arch >= 15) ? 7 : 59;
case BI_PRELOAD_GLOBAL_ID_0:
return 60;
return (arch >= 15) ? 0 : 60;
case BI_PRELOAD_GLOBAL_ID_1:
return 61;
return (arch >= 15) ? 1 : 61;
case BI_PRELOAD_GLOBAL_ID_2:
return 62;
return (arch >= 15) ? 2 : 62;
/* Vertex */
case BI_PRELOAD_POS_RESULT_PTR_LO:
assert(arch < 9);
@ -1190,58 +1190,58 @@ bi_preload_reg(enum bi_preload val, unsigned arch)
return 59;
case BI_PRELOAD_INTERNAL_ID:
assert(arch >= 9);
return 59;
return (arch >= 15) ? 2 : 59;
case BI_PRELOAD_VERTEX_ID:
return (arch >= 9) ? 60 : 61;
return (arch >= 15) ? 0 : (arch >= 9) ? 60 : 61;
case BI_PRELOAD_INSTANCE_ID:
return (arch >= 9) ? 61 : 62;
return (arch >= 15) ? 1 : (arch >= 9) ? 61 : 62;
case BI_PRELOAD_DRAW_ID:
assert(arch >= 9);
return 62;
return (arch >= 15) ? 3 : 62;
case BI_PRELOAD_VIEW_ID:
assert(arch >= 9);
return 63;
return (arch >= 15) ? 4 : 63;
/* Fragment */
case BI_PRELOAD_PRIMITIVE_ID:
return 57;
return (arch >= 15) ? 6 : 57;
case BI_PRELOAD_PRIMITIVE_FLAGS:
return 58;
return (arch >= 15) ? 3 : 58;
case BI_PRELOAD_POSITION_XY:
return 59;
return (arch >= 15) ? 2 : 59;
case BI_PRELOAD_CUMULATIVE_COVERAGE:
/* Bits [15;0] */
return 60;
return (arch >= 15) ? 0 : 60;
case BI_PRELOAD_RASTERIZER_COVERAGE:
/* Bits [15;0] */
return 61;
return (arch >= 15) ? 1 : 61;
case BI_PRELOAD_SAMPLE_ID:
/* Bits [23;16] */
return 61;
return (arch >= 15) ? 0 : 61;
case BI_PRELOAD_CENTROID_ID:
/* Bits [31;24] */
return 61;
return (arch >= 15) ? 0 : 61;
case BI_PRELOAD_FRAME_ARG:
/* Double reg */
return 62;
return (arch >= 15) ? 4 : 62;
/* Blend */
case BI_PRELOAD_BLEND_SRC0_C0:
return 0;
return (arch >= 15) ? 8 : 0;
case BI_PRELOAD_BLEND_SRC0_C1:
return 1;
return (arch >= 15) ? 9 : 1;
case BI_PRELOAD_BLEND_SRC0_C2:
return 2;
return (arch >= 15) ? 10 : 2;
case BI_PRELOAD_BLEND_SRC0_C3:
return 3;
return (arch >= 15) ? 11 : 3;
case BI_PRELOAD_BLEND_SRC1_C0:
return 4;
return (arch >= 15) ? 12 : 4;
case BI_PRELOAD_BLEND_SRC1_C1:
return 5;
return (arch >= 15) ? 13 : 5;
case BI_PRELOAD_BLEND_SRC1_C2:
return 6;
return (arch >= 15) ? 14 : 6;
case BI_PRELOAD_BLEND_SRC1_C3:
return 7;
return (arch >= 15) ? 15 : 7;
case BI_PRELOAD_BLEND_LINK:
return 48;
return (arch >= 15) ? 7 : 48;
}
UNREACHABLE("Non-handled BI_PRELOAD");
}

View file

@ -2016,16 +2016,16 @@
</enum>
<struct name="Preload" size="1">
<field name="R48-R63" size="16" start="0:0" type="hex"/>
<field name="R55" size="1" start="0:7" type="bool"/>
<field name="R56" size="1" start="0:8" type="bool"/>
<field name="R57" size="1" start="0:9" type="bool"/>
<field name="R58" size="1" start="0:10" type="bool"/>
<field name="R59" size="1" start="0:11" type="bool"/>
<field name="R60" size="1" start="0:12" type="bool"/>
<field name="R61" size="1" start="0:13" type="bool"/>
<field name="R62" size="1" start="0:14" type="bool"/>
<field name="R63" size="1" start="0:15" type="bool"/>
<field name="R0-R15" size="16" start="0:0" type="hex"/>
<field name="R0" size="1" start="0:0" type="bool"/>
<field name="R1" size="1" start="0:1" type="bool"/>
<field name="R2" size="1" start="0:2" type="bool"/>
<field name="R3" size="1" start="0:3" type="bool"/>
<field name="R4" size="1" start="0:4" type="bool"/>
<field name="R5" size="1" start="0:5" type="bool"/>
<field name="R6" size="1" start="0:6" type="bool"/>
<field name="R7" size="1" start="0:7" type="bool"/>
<field name="R8" size="1" start="0:8" type="bool"/>
</struct>
<enum name="Coverage bitmask type">

View file

@ -241,12 +241,13 @@ get_frame_shader(struct panvk_device *dev,
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
#if PAN_ARCH >= 15
cfg.register_count = shader->info.work_reg_count;
cfg.preload.r0_r15 = shader->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
cfg.preload.r48_r63 = shader->info.preload >> 48;
#endif
cfg.binary = panvk_priv_mem_dev_addr(shader->code_mem);
cfg.preload.r48_r63 = shader->info.preload >> 48;
}
#endif

View file

@ -1182,12 +1182,13 @@ panvk_shader_upload(struct panvk_device *dev,
#if PAN_ARCH >= 15
cfg.register_count = shader->info.work_reg_count;
cfg.preload.r0_r15 = shader->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
cfg.preload.r48_r63 = (shader->info.preload >> 48);
#endif
cfg.binary = panvk_shader_variant_get_dev_addr(shader);
cfg.preload.r48_r63 = (shader->info.preload >> 48);
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
@ -1205,12 +1206,13 @@ panvk_shader_upload(struct panvk_device *dev,
cfg.stage = pan_shader_stage(&shader->info);
#if PAN_ARCH >= 15
cfg.register_count = shader->info.work_reg_count;
cfg.preload.r0_r15 = shader->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
cfg.preload.r48_r63 = (shader->info.preload >> 48);
#endif
cfg.binary = panvk_shader_variant_get_dev_addr(shader);
cfg.preload.r48_r63 = (shader->info.preload >> 48);
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
}
@ -1224,13 +1226,14 @@ panvk_shader_upload(struct panvk_device *dev,
cfg.stage = pan_shader_stage(&shader->info);
#if PAN_ARCH >= 15
cfg.register_count = shader->info.work_reg_count;
cfg.preload.r0_r15 = shader->info.preload;
#else
cfg.register_allocation =
pan_register_allocation(shader->info.work_reg_count);
cfg.preload.r48_r63 = (shader->info.preload >> 48);
#endif
cfg.binary = panvk_shader_variant_get_dev_addr(shader) +
shader->info.vs.no_psiz_offset;
cfg.preload.r48_r63 = (shader->info.preload >> 48);
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
}
#else