mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
pan/va: Implement v15 register count changes
With v15, we get support for 128 registers in any multiple of 16 (vs previously having the choice between 32 or 64 register mode). To support this, shader register count is passed in a different way from v15, requiring some updates to how we encode the ShaderProgramDescriptor and the ShaderProgramPointer. Note that this currently does not change the compiler behavior of running in either 32 or 64 register mode, just how this is passed to the GPU.
This commit is contained in:
parent
b7afb629c3
commit
ad81596b6d
9 changed files with 97 additions and 7 deletions
|
|
@ -4456,9 +4456,12 @@ prepare_shader(struct panfrost_compiled_shader *state,
|
|||
else if (vs)
|
||||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 15
|
||||
cfg.register_count = state->info.work_reg_count;
|
||||
#else
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(state->info.work_reg_count);
|
||||
#endif
|
||||
cfg.binary = state->bin.gpu;
|
||||
cfg.preload.r48_r63 = (state->info.preload >> 48);
|
||||
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
|
||||
|
|
@ -4476,8 +4479,12 @@ prepare_shader(struct panfrost_compiled_shader *state,
|
|||
#if PAN_ARCH < 12
|
||||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
|
||||
#endif
|
||||
#if PAN_ARCH >= 15
|
||||
cfg.register_count = state->info.work_reg_count;
|
||||
#else
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(state->info.work_reg_count);
|
||||
#endif
|
||||
cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset;
|
||||
cfg.preload.r48_r63 = (state->info.preload >> 48);
|
||||
cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info);
|
||||
|
|
|
|||
|
|
@ -1105,7 +1105,11 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool,
|
|||
pan_cast_and_pack(spd.cpu, SHADER_PROGRAM, cfg) {
|
||||
cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
|
||||
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
|
||||
#if PAN_ARCH >= 15
|
||||
cfg.register_count = preload_shader->info.work_reg_count;
|
||||
#else
|
||||
cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
|
||||
#endif
|
||||
cfg.binary = preload_shader->address;
|
||||
cfg.preload.r48_r63 = preload_shader->info.preload >> 48;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -98,8 +98,12 @@ panfrost_precomp_shader_create(
|
|||
|
||||
pan_cast_and_pack(spd.cpu, SHADER_PROGRAM, cfg) {
|
||||
cfg.stage = pan_shader_stage(&res->info);
|
||||
#if PAN_ARCH >= 15
|
||||
cfg.register_count = res->info.work_reg_count;
|
||||
#else
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(res->info.work_reg_count);
|
||||
#endif
|
||||
cfg.binary = res->code_ptr;
|
||||
cfg.preload.r48_r63 = (res->info.preload >> 48);
|
||||
cfg.flush_to_zero_mode = panfrost_ftz_mode(&res->info);
|
||||
|
|
@ -326,7 +330,17 @@ GENX(panfrost_launch_precomp)(struct panfrost_batch *batch,
|
|||
uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56);
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, FAU_0), fau_ptr);
|
||||
|
||||
#if PAN_ARCH >= 15
|
||||
struct mali_shader_program_pointer_packed spp;
|
||||
pan_pack(&spp, SHADER_PROGRAM_POINTER, ctx) {
|
||||
ctx.register_count = shader->info.work_reg_count;
|
||||
ctx.pointer = shader->state_ptr;
|
||||
}
|
||||
uint64_t ptr = ((uint64_t)spp.opaque[1] << 32) | spp.opaque[0];
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0), ptr);
|
||||
#else
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, SPD_0), shader->state_ptr);
|
||||
#endif
|
||||
cs_move64_to(b, cs_sr_reg64(b, COMPUTE, TSD_0), tsd);
|
||||
|
||||
/* Global attribute offset */
|
||||
|
|
|
|||
|
|
@ -651,8 +651,19 @@ pandecode_run_compute(struct pandecode_context *ctx, FILE *fp,
|
|||
if (fau)
|
||||
GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU");
|
||||
|
||||
GENX(pandecode_shader)
|
||||
(ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id);
|
||||
uint64_t addr = cs_get_u64(qctx, reg_spd);
|
||||
#if PAN_ARCH >= 15
|
||||
const struct mali_shader_program_pointer_packed spp_packed = {
|
||||
.opaque[0] = addr & 0xFFFFFFFF,
|
||||
.opaque[1] = (addr >> 32) & 0xFFFFFFFF,
|
||||
};
|
||||
pan_unpack(&spp_packed, SHADER_PROGRAM_POINTER, spp)
|
||||
;
|
||||
DUMP_UNPACKED(ctx, SHADER_PROGRAM_POINTER, spp,
|
||||
"Shader Program Pointer (%" PRIx64 "):\n", addr);
|
||||
addr = spp.pointer;
|
||||
#endif
|
||||
GENX(pandecode_shader)(ctx, addr, "Shader", qctx->gpu_id);
|
||||
|
||||
DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd),
|
||||
"Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd));
|
||||
|
|
@ -693,8 +704,19 @@ pandecode_run_compute_indirect(struct pandecode_context *ctx, FILE *fp,
|
|||
if (fau)
|
||||
GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU");
|
||||
|
||||
GENX(pandecode_shader)
|
||||
(ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id);
|
||||
uint64_t addr = cs_get_u64(qctx, reg_spd);
|
||||
#if PAN_ARCH >= 15
|
||||
const struct mali_shader_program_pointer_packed spp_packed = {
|
||||
.opaque[0] = addr & 0xFFFFFFFF,
|
||||
.opaque[1] = (addr >> 32) & 0xFFFFFFFF,
|
||||
};
|
||||
pan_unpack(&spp_packed, SHADER_PROGRAM_POINTER, spp)
|
||||
;
|
||||
DUMP_UNPACKED(ctx, SHADER_PROGRAM_POINTER, spp,
|
||||
"Shader Program Pointer (%" PRIx64 "):\n", addr);
|
||||
addr = spp.pointer;
|
||||
#endif
|
||||
GENX(pandecode_shader)(ctx, addr, "Shader", qctx->gpu_id);
|
||||
|
||||
DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd),
|
||||
"Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd));
|
||||
|
|
|
|||
|
|
@ -2040,14 +2040,20 @@
|
|||
<field name="Suppress NaN" size="1" start="0:16" type="bool"/>
|
||||
<field name="Flush to zero mode" size="2" start="0:17" type="Flush to zero mode"/>
|
||||
<field name="Suppress Inf" size="1" start="0:19" type="bool"/>
|
||||
<field name="Register Count" size="5" start="0:20" type="uint" modifier="align(16) shr(3) minus(1)"/>
|
||||
<field name="Requires helper threads" size="1" start="0:28" type="bool"/> <!-- Fragment only -->
|
||||
<field name="Shader contains JUMP_EX" size="1" start="0:29" type="bool"/>
|
||||
<field name="Register allocation" size="2" start="0:30" type="Shader Register Allocation"/>
|
||||
<field name="Preload" size="16" start="1:0" type="Preload"/>
|
||||
<field name="Max Warps" size="16" start="1:16" type="uint"/>
|
||||
<field name="Binary" size="64" start="2:0" type="address"/>
|
||||
</struct>
|
||||
|
||||
<!-- Only used by RUN_COMPUTE -->
|
||||
<struct name="Shader Program Pointer" size="2" align="8">
|
||||
<field name="Register Count" size="5" start="0:0" type="uint" modifier="align(16) shr(3) minus(1)"/>
|
||||
<field name="Pointer" size="51" start="0:5" type="address" modifier="shr(5)"/>
|
||||
</struct>
|
||||
|
||||
<struct name="Scissor">
|
||||
<field name="Scissor Minimum X" size="16" start="0:0" type="uint"/>
|
||||
<field name="Scissor Minimum Y" size="16" start="0:16" type="uint"/>
|
||||
|
|
|
|||
|
|
@ -209,9 +209,20 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
|
|||
cs_move64_to(b, cs_reg64(b, PANVK_COMPUTE_FAU), fau_ptr);
|
||||
}
|
||||
|
||||
if (compute_state_dirty(cmdbuf, CS))
|
||||
if (compute_state_dirty(cmdbuf, CS)) {
|
||||
#if PAN_ARCH >= 15
|
||||
struct mali_shader_program_pointer_packed spp;
|
||||
pan_pack(&spp, SHADER_PROGRAM_POINTER, ctx) {
|
||||
ctx.register_count = cs->info.work_reg_count;
|
||||
ctx.pointer = panvk_priv_mem_dev_addr(cs->spd);
|
||||
}
|
||||
uint64_t ptr = ((uint64_t)spp.opaque[1] << 32) | spp.opaque[0];
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_COMPUTE_SPD), ptr);
|
||||
#else
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_COMPUTE_SPD),
|
||||
panvk_priv_mem_dev_addr(cs->spd));
|
||||
#endif
|
||||
}
|
||||
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_COMPUTE_TSD), tsd);
|
||||
|
||||
|
|
|
|||
|
|
@ -82,8 +82,18 @@ panvk_per_arch(dispatch_precomp)(struct panvk_precomp_ctx *ctx,
|
|||
uint64_t fau_ptr = push_uniforms.gpu | (fau_count << 56);
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_PRECOMP_FAU), fau_ptr);
|
||||
|
||||
#if PAN_ARCH >= 15
|
||||
struct mali_shader_program_pointer_packed spp;
|
||||
pan_pack(&spp, SHADER_PROGRAM_POINTER, ctx) {
|
||||
ctx.register_count = shader->info.work_reg_count;
|
||||
ctx.pointer = panvk_priv_mem_dev_addr(shader->spd);
|
||||
}
|
||||
uint64_t ptr = ((uint64_t)spp.opaque[1] << 32) | spp.opaque[0];
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_PRECOMP_SPD), ptr);
|
||||
#else
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_PRECOMP_SPD),
|
||||
panvk_priv_mem_dev_addr(shader->spd));
|
||||
#endif
|
||||
|
||||
cs_move64_to(b, cs_reg64(b, PANVK_PRECOMP_TSD), tsd);
|
||||
|
||||
|
|
|
|||
|
|
@ -239,8 +239,12 @@ get_frame_shader(struct panvk_device *dev,
|
|||
panvk_priv_mem_write_desc(shader->spd, 0, SHADER_PROGRAM, cfg) {
|
||||
cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
|
||||
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
|
||||
#if PAN_ARCH >= 15
|
||||
cfg.register_count = shader->info.work_reg_count;
|
||||
#else
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(shader->info.work_reg_count);
|
||||
#endif
|
||||
cfg.binary = panvk_priv_mem_dev_addr(shader->code_mem);
|
||||
cfg.preload.r48_r63 = shader->info.preload >> 48;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1180,8 +1180,12 @@ panvk_shader_upload(struct panvk_device *dev,
|
|||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 15
|
||||
cfg.register_count = shader->info.work_reg_count;
|
||||
#else
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(shader->info.work_reg_count);
|
||||
#endif
|
||||
cfg.binary = panvk_shader_variant_get_dev_addr(shader);
|
||||
cfg.preload.r48_r63 = (shader->info.preload >> 48);
|
||||
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
|
||||
|
|
@ -1199,8 +1203,12 @@ panvk_shader_upload(struct panvk_device *dev,
|
|||
panvk_priv_mem_write_desc(shader->spds.all_points, 0, SHADER_PROGRAM,
|
||||
cfg) {
|
||||
cfg.stage = pan_shader_stage(&shader->info);
|
||||
#if PAN_ARCH >= 15
|
||||
cfg.register_count = shader->info.work_reg_count;
|
||||
#else
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(shader->info.work_reg_count);
|
||||
#endif
|
||||
cfg.binary = panvk_shader_variant_get_dev_addr(shader);
|
||||
cfg.preload.r48_r63 = (shader->info.preload >> 48);
|
||||
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
|
||||
|
|
@ -1214,8 +1222,12 @@ panvk_shader_upload(struct panvk_device *dev,
|
|||
panvk_priv_mem_write_desc(shader->spds.all_triangles, 0, SHADER_PROGRAM,
|
||||
cfg) {
|
||||
cfg.stage = pan_shader_stage(&shader->info);
|
||||
#if PAN_ARCH >= 15
|
||||
cfg.register_count = shader->info.work_reg_count;
|
||||
#else
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(shader->info.work_reg_count);
|
||||
#endif
|
||||
cfg.binary = panvk_shader_variant_get_dev_addr(shader) +
|
||||
shader->info.vs.no_psiz_offset;
|
||||
cfg.preload.r48_r63 = (shader->info.preload >> 48);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue