diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 15680c000bc..efd9d22e338 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -4458,12 +4458,13 @@ prepare_shader(struct panfrost_compiled_shader *state, #endif #if PAN_ARCH >= 15 cfg.register_count = state->info.work_reg_count; + cfg.preload.r0_r15 = state->info.preload; #else cfg.register_allocation = pan_register_allocation(state->info.work_reg_count); + cfg.preload.r48_r63 = (state->info.preload >> 48); #endif cfg.binary = state->bin.gpu; - cfg.preload.r48_r63 = (state->info.preload >> 48); cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info); if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT) @@ -4481,12 +4482,13 @@ prepare_shader(struct panfrost_compiled_shader *state, #endif #if PAN_ARCH >= 15 cfg.register_count = state->info.work_reg_count; + cfg.preload.r0_r15 = state->info.preload; #else cfg.register_allocation = pan_register_allocation(state->info.work_reg_count); + cfg.preload.r48_r63 = (state->info.preload >> 48); #endif cfg.binary = state->bin.gpu + state->info.vs.no_psiz_offset; - cfg.preload.r48_r63 = (state->info.preload >> 48); cfg.flush_to_zero_mode = panfrost_ftz_mode(&state->info); } diff --git a/src/gallium/drivers/panfrost/pan_fb_preload.c b/src/gallium/drivers/panfrost/pan_fb_preload.c index be4c2c9965e..6f1bba8ab66 100644 --- a/src/gallium/drivers/panfrost/pan_fb_preload.c +++ b/src/gallium/drivers/panfrost/pan_fb_preload.c @@ -1107,11 +1107,12 @@ pan_preload_emit_dcd(struct pan_fb_preload_cache *cache, struct pan_pool *pool, cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; #if PAN_ARCH >= 15 cfg.register_count = preload_shader->info.work_reg_count; + cfg.preload.r0_r15 = preload_shader->info.preload; #else cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD; + cfg.preload.r48_r63 = preload_shader->info.preload >> 48; #endif cfg.binary = preload_shader->address; - cfg.preload.r48_r63 = preload_shader->info.preload >> 48; } unsigned bd_count = views.rt_count; diff --git a/src/gallium/drivers/panfrost/pan_precomp.c b/src/gallium/drivers/panfrost/pan_precomp.c index 9ca77b32dd7..bc4640d7e2a 100644 --- a/src/gallium/drivers/panfrost/pan_precomp.c +++ b/src/gallium/drivers/panfrost/pan_precomp.c @@ -100,12 +100,13 @@ panfrost_precomp_shader_create( cfg.stage = pan_shader_stage(&res->info); #if PAN_ARCH >= 15 cfg.register_count = res->info.work_reg_count; + cfg.preload.r0_r15 = res->info.preload; #else cfg.register_allocation = pan_register_allocation(res->info.work_reg_count); + cfg.preload.r48_r63 = (res->info.preload >> 48); #endif cfg.binary = res->code_ptr; - cfg.preload.r48_r63 = (res->info.preload >> 48); cfg.flush_to_zero_mode = panfrost_ftz_mode(&res->info); } diff --git a/src/panfrost/compiler/bifrost/bi_ra.c b/src/panfrost/compiler/bifrost/bi_ra.c index f8579f8c983..7f058bf0d3f 100644 --- a/src/panfrost/compiler/bifrost/bi_ra.c +++ b/src/panfrost/compiler/bifrost/bi_ra.c @@ -294,7 +294,8 @@ bi_compute_liveness_ra(bi_context *ctx) #define EVEN_BITS_MASK (0x5555555555555555ull) static uint64_t -bi_make_affinity(uint64_t clobber, unsigned count, bool split_file) +bi_make_affinity(uint64_t clobber, unsigned count, bool split_file, + unsigned arch) { uint64_t clobbered = 0; @@ -308,12 +309,12 @@ bi_make_affinity(uint64_t clobber, unsigned count, bool split_file) clobbered |= mask << (64 - excess); if (split_file) - clobbered |= mask << (16 - excess); + clobbered |= mask << (((arch >= 15) ? 32 : 16) - excess); } /* Don't allocate the middle if we split out the middle */ if (split_file) - clobbered |= BITFIELD64_MASK(32) << 16; + clobbered |= BITFIELD64_MASK(32) << ((arch >= 15) ? 32 : 16); /* We can use a register iff it's not clobberred */ return ~clobbered; @@ -341,7 +342,7 @@ bi_mark_interference(bi_block *block, struct lcra_state *l, uint8_t *live, unsigned count = bi_count_write_registers(ins, d); unsigned offset = ins->dest[d].offset; uint64_t affinity = - bi_make_affinity(preload_live, count, split_file) >> offset; + bi_make_affinity(preload_live, count, split_file, arch) >> offset; /* Valhall needs >= 64-bit staging writes to be pair-aligned */ if (aligned_sr && (count >= 2 || offset)) affinity &= EVEN_BITS_MASK; @@ -435,7 +436,8 @@ bi_allocate_registers(bi_context *ctx, bool *success, bool full_regs) uint64_t default_affinity = ctx->inputs->is_blend ? BITFIELD64_MASK(16) : full_regs ? BITFIELD64_MASK(64) - : (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48)); + : (ctx->arch >= 15) ? BITFIELD64_MASK(32) + : (BITFIELD64_MASK(16) | (BITFIELD64_MASK(16) << 48)); /* To test spilling, mimic a small register file */ if (bifrost_debug & BIFROST_DBG_SPILL && !ctx->inputs->is_blend && (bifrost_debug & BIFROST_DBG_NOSSARA)) diff --git a/src/panfrost/compiler/bifrost/compiler.h b/src/panfrost/compiler/bifrost/compiler.h index 39b4f8d09a0..20b553f43da 100644 --- a/src/panfrost/compiler/bifrost/compiler.h +++ b/src/panfrost/compiler/bifrost/compiler.h @@ -1162,25 +1162,25 @@ bi_preload_reg(enum bi_preload val, unsigned arch) /* Compute */ case BI_PRELOAD_LOCAL_ID_0: /* Bits [15;0] */ - return 55; + return (arch >= 15) ? 4 : 55; case BI_PRELOAD_LOCAL_ID_1: /* Bits [31;16] */ - return 55; + return (arch >= 15) ? 4 : 55; case BI_PRELOAD_LOCAL_ID_2: /* Bits [15;0] */ - return 56; + return (arch >= 15) ? 3 : 56; case BI_PRELOAD_WORKGROUP_ID_0: - return 57; + return (arch >= 15) ? 5 : 57; case BI_PRELOAD_WORKGROUP_ID_1: - return 58; + return (arch >= 15) ? 6 : 58; case BI_PRELOAD_WORKGROUP_ID_2: - return 59; + return (arch >= 15) ? 7 : 59; case BI_PRELOAD_GLOBAL_ID_0: - return 60; + return (arch >= 15) ? 0 : 60; case BI_PRELOAD_GLOBAL_ID_1: - return 61; + return (arch >= 15) ? 1 : 61; case BI_PRELOAD_GLOBAL_ID_2: - return 62; + return (arch >= 15) ? 2 : 62; /* Vertex */ case BI_PRELOAD_POS_RESULT_PTR_LO: assert(arch < 9); @@ -1190,58 +1190,58 @@ bi_preload_reg(enum bi_preload val, unsigned arch) return 59; case BI_PRELOAD_INTERNAL_ID: assert(arch >= 9); - return 59; + return (arch >= 15) ? 2 : 59; case BI_PRELOAD_VERTEX_ID: - return (arch >= 9) ? 60 : 61; + return (arch >= 15) ? 0 : (arch >= 9) ? 60 : 61; case BI_PRELOAD_INSTANCE_ID: - return (arch >= 9) ? 61 : 62; + return (arch >= 15) ? 1 : (arch >= 9) ? 61 : 62; case BI_PRELOAD_DRAW_ID: assert(arch >= 9); - return 62; + return (arch >= 15) ? 3 : 62; case BI_PRELOAD_VIEW_ID: assert(arch >= 9); - return 63; + return (arch >= 15) ? 4 : 63; /* Fragment */ case BI_PRELOAD_PRIMITIVE_ID: - return 57; + return (arch >= 15) ? 6 : 57; case BI_PRELOAD_PRIMITIVE_FLAGS: - return 58; + return (arch >= 15) ? 3 : 58; case BI_PRELOAD_POSITION_XY: - return 59; + return (arch >= 15) ? 2 : 59; case BI_PRELOAD_CUMULATIVE_COVERAGE: /* Bits [15;0] */ - return 60; + return (arch >= 15) ? 0 : 60; case BI_PRELOAD_RASTERIZER_COVERAGE: /* Bits [15;0] */ - return 61; + return (arch >= 15) ? 1 : 61; case BI_PRELOAD_SAMPLE_ID: /* Bits [23;16] */ - return 61; + return (arch >= 15) ? 0 : 61; case BI_PRELOAD_CENTROID_ID: /* Bits [31;24] */ - return 61; + return (arch >= 15) ? 0 : 61; case BI_PRELOAD_FRAME_ARG: /* Double reg */ - return 62; + return (arch >= 15) ? 4 : 62; /* Blend */ case BI_PRELOAD_BLEND_SRC0_C0: - return 0; + return (arch >= 15) ? 8 : 0; case BI_PRELOAD_BLEND_SRC0_C1: - return 1; + return (arch >= 15) ? 9 : 1; case BI_PRELOAD_BLEND_SRC0_C2: - return 2; + return (arch >= 15) ? 10 : 2; case BI_PRELOAD_BLEND_SRC0_C3: - return 3; + return (arch >= 15) ? 11 : 3; case BI_PRELOAD_BLEND_SRC1_C0: - return 4; + return (arch >= 15) ? 12 : 4; case BI_PRELOAD_BLEND_SRC1_C1: - return 5; + return (arch >= 15) ? 13 : 5; case BI_PRELOAD_BLEND_SRC1_C2: - return 6; + return (arch >= 15) ? 14 : 6; case BI_PRELOAD_BLEND_SRC1_C3: - return 7; + return (arch >= 15) ? 15 : 7; case BI_PRELOAD_BLEND_LINK: - return 48; + return (arch >= 15) ? 7 : 48; } UNREACHABLE("Non-handled BI_PRELOAD"); } diff --git a/src/panfrost/genxml/v15.xml b/src/panfrost/genxml/v15.xml index 983834f16e4..f1a2bc7e2c9 100644 --- a/src/panfrost/genxml/v15.xml +++ b/src/panfrost/genxml/v15.xml @@ -2016,16 +2016,16 @@ - - - - - - - - - - + + + + + + + + + + diff --git a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c index afa2692bde0..d687fa1d6cf 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_frame_shaders.c @@ -241,12 +241,13 @@ get_frame_shader(struct panvk_device *dev, cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; #if PAN_ARCH >= 15 cfg.register_count = shader->info.work_reg_count; + cfg.preload.r0_r15 = shader->info.preload; #else cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); + cfg.preload.r48_r63 = shader->info.preload >> 48; #endif cfg.binary = panvk_priv_mem_dev_addr(shader->code_mem); - cfg.preload.r48_r63 = shader->info.preload >> 48; } #endif diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 927a72cd7c9..639288e6e01 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -1182,12 +1182,13 @@ panvk_shader_upload(struct panvk_device *dev, #if PAN_ARCH >= 15 cfg.register_count = shader->info.work_reg_count; + cfg.preload.r0_r15 = shader->info.preload; #else cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); + cfg.preload.r48_r63 = (shader->info.preload >> 48); #endif cfg.binary = panvk_shader_variant_get_dev_addr(shader); - cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT) @@ -1205,12 +1206,13 @@ panvk_shader_upload(struct panvk_device *dev, cfg.stage = pan_shader_stage(&shader->info); #if PAN_ARCH >= 15 cfg.register_count = shader->info.work_reg_count; + cfg.preload.r0_r15 = shader->info.preload; #else cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); + cfg.preload.r48_r63 = (shader->info.preload >> 48); #endif cfg.binary = panvk_shader_variant_get_dev_addr(shader); - cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); } @@ -1224,13 +1226,14 @@ panvk_shader_upload(struct panvk_device *dev, cfg.stage = pan_shader_stage(&shader->info); #if PAN_ARCH >= 15 cfg.register_count = shader->info.work_reg_count; + cfg.preload.r0_r15 = shader->info.preload; #else cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); + cfg.preload.r48_r63 = (shader->info.preload >> 48); #endif cfg.binary = panvk_shader_variant_get_dev_addr(shader) + shader->info.vs.no_psiz_offset; - cfg.preload.r48_r63 = (shader->info.preload >> 48); cfg.flush_to_zero_mode = shader_ftz_mode(shader); } #else