From 0670b400137ea5984477c89f7e47ee497ce463f1 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 11 May 2026 15:29:53 -0700 Subject: [PATCH] jay: Add comments summarizing the PS thread payload layout The documentation is large and hard to follow due to all the optional fields and the SIMD16 vs. SIMD32 split for barycentrics. This quick summary helps clarify what fields exist, which are split for SIMD32 or kept together, and which pairs of registers are involved for splits. Part-of: --- src/intel/compiler/jay/jay_from_nir.c | 59 +++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index 9c359f5fad5..f6a47870501 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -2503,6 +2503,50 @@ setup_compute_payload(struct nir_to_jay_state *nj, struct payload_builder *p) static void setup_fragment_payload(struct nir_to_jay_state *nj, struct payload_builder *p) { + /* Summarizing the "PS Thread Payload for Normal Dispatch" docs, the + * physical thread payload layout is as follows: + * + * UGPRs: + * R0: All modes + * R1: SIMD32-only (not present for SIMD16) + * + * Barycentrics (optional, see "Barycentric Interpolation Mode" bits): + * (lanes 15:0 in first register, lanes 31:16 in higher register) + * + * GPRs: + * R2+R23: pixel location[1] + * R3+R24: pixel location[2] + * R4+R25: centroid[1] + * R5+R26: centroid[2] + * R6+R27: sample[1] + * R7+R28: sample[2] + * R8+R29: noperspective pixel[1] + * R9+R30: noperspective pixel[2] + * R10+R31: noperspective centroid[1] + * R11+R32: noperspective centroid[2] + * R12+R33: noperspective sample[1] + * R13+R34: noperspective sample[2] + * + * R14+R35: Source Depth (optional) + * R15+R36: Source W (optional) + * R16+R37: Input Coverage Mask (optional) + * + * R17-R18: (defeatured) + * + * UGPRs: + * R19: Sample Position Offsets (optional, see "XY Offset Select") + * 32 lanes in a single register, X/Y are 1 byte each. + * (i.e. lane 7 is at coordinate (X, Y)) + * R20: Centroid Position Offsets (see "Requires Centroid Offset") + * + * R21: Requested Coarse Pixel Shading Rate (optional) + * + * R22: Sample Offsets (optional, see "Requires Sample Offsets") + * (i.e. sample 4 is at subpixel coordinate (X, Y)) + * + * TODO: multipolygon, explicit barycentrics, ... + */ + jay_fs_payload *fs = &nj->payload.fs; if (nj->s->dispatch_width == 32) { @@ -2545,9 +2589,7 @@ setup_fragment_payload(struct nir_to_jay_state *nj, struct payload_builder *p) fs->coord.xy = jay_OFFSET_PACKED_PIXEL_COORDS_u32(&nj->bld, t); } - /* Due to complexities of the physical payload, the logical payload is split - * into even/odd halves. Fix up the offsets and insert copies. - */ + /* Renumber to match what jay_insert_payload_swizzle expects. */ if (nj->s->dispatch_width == 32) { jay_foreach_inst_in_block(nj->after_block, I) { if (I->op == JAY_OPCODE_PRELOAD && I->dst.file == GPR) { @@ -2558,6 +2600,17 @@ setup_fragment_payload(struct nir_to_jay_state *nj, struct payload_builder *p) } } +/* + * For SIMD32 dispatch, many fields come as pairs of discontiguous GRFs + * (i.e. R2+R23), where the first register contains the lanes 15:0, and + * the higher register contains lanes 31:16. This doesn't map well to + * our assumption that GPRs hold 32 lanes of values and are stored in + * contiguous aligned pairs of GRFs. + * + * We insert copies to put both halves together. Payload fields have + * both an even-numbered and an odd-numbered register (i.e. R2+R23). + * We use some tricks to reduce the number of copies. + */ static void jay_insert_payload_swizzle(jay_shader *s) {