From ac00e526eabec376f9ac3a1df4f6576017e65f0d Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 27 May 2026 09:28:11 -0400 Subject: [PATCH] jay: gate early EOT code behind =strict Works fine on Xe2 Linux and fixes us for Xe3. simd16: Totals from 2635 (99.55% of 2647) affected shaders: Instrs: 2722355 -> 2719307 (-0.11%) CodeSize: 40641680 -> 40605472 (-0.09%) simd32: Totals from 2629 (99.32% of 2647) affected shaders: Instrs: 4052325 -> 4049578 (-0.07%); split: -0.07%, +0.00% CodeSize: 60360016 -> 60329392 (-0.05%); split: -0.05%, +0.00% Signed-off-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/jay/jay_partition.c | 7 +++++++ src/intel/compiler/jay/jay_private.h | 19 +++++++++++++++++++ .../compiler/jay/jay_register_allocate.c | 8 ++++---- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/jay/jay_partition.c b/src/intel/compiler/jay/jay_partition.c index 629d22a1795..53a0fa6fd1a 100644 --- a/src/intel/compiler/jay/jay_partition.c +++ b/src/intel/compiler/jay/jay_partition.c @@ -187,6 +187,13 @@ jay_partition_grf(jay_shader *shader) eot_u = 1; } + /* EOT blocks are only relevant for platforms with Early EOT, otherwise any + * register works fine. + */ + if (!jay_has_early_eot(shader)) { + eot_4 = eot_u = 0; + } + unsigned special_u = payload_u[0] + payload_u[1] + spill_reservation + eot_u; unsigned special_4 = payload_4[0] + payload_4[1] + eot_4; diff --git a/src/intel/compiler/jay/jay_private.h b/src/intel/compiler/jay/jay_private.h index 29100310d88..14cf68655ac 100644 --- a/src/intel/compiler/jay/jay_private.h +++ b/src/intel/compiler/jay/jay_private.h @@ -98,6 +98,25 @@ jay_gpr_limit(jay_shader *shader) return test ? 13 : shader->num_regs[GPR]; } +/* + * Check whether the Early EOT feature is possibly enabled. This feature was + * removed in Xe3+. It exists on Xe2+ and fulsim enables it but real hardware + * under xe.ko does not, so we gate on strict mode there. Pre-Xe2, it is always + * enabled right now. + */ +static inline bool +jay_has_early_eot(jay_shader *s) +{ + return (s->devinfo->ver == 20 && (jay_debug & JAY_DBG_STRICT)) || + (s->devinfo->ver < 20); +} + +static inline bool +jay_is_early_eot_send(jay_shader *s, const jay_inst *I) +{ + return I->op == JAY_OPCODE_SEND && jay_send_eot(I) && jay_has_early_eot(s); +} + #ifdef __cplusplus } /* extern C */ #endif diff --git a/src/intel/compiler/jay/jay_register_allocate.c b/src/intel/compiler/jay/jay_register_allocate.c index 19c366107c2..0b240f84bcc 100644 --- a/src/intel/compiler/jay/jay_register_allocate.c +++ b/src/intel/compiler/jay/jay_register_allocate.c @@ -177,8 +177,8 @@ struct affinity { signed offset:7; /** - * If true, this value is used in an end-of-thread SEND and requires high - * registers. + * If true, this value is used in an early end-of-thread SEND and requires + * high registers. */ bool eot:1; @@ -864,7 +864,7 @@ pick_regs(jay_ra_state *ra, bool is_src) { struct jay_partition *partition = &ra->b.shader->partition; - bool eot = I->op == JAY_OPCODE_SEND && jay_send_eot(I); + bool eot = jay_is_early_eot_send(ra->b.shader, I); /* If possible, keep sources in place to avoid shuffles. */ if (is_src && jay_channel(var, 0) != 0) { @@ -1465,7 +1465,7 @@ jay_register_allocate_function(jay_function *f) ra.affinities[index].nr = MIN2(jay_num_values(I->src[s]), 15); } - if (I->op == JAY_OPCODE_SEND && jay_send_eot(I)) { + if (jay_is_early_eot_send(shader, I)) { ra.affinities[index].eot = true; }