From bd634bef1237b3429731d70f508680ebd26e56f1 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 1 Dec 2023 17:01:36 -0800 Subject: [PATCH] intel/fs/xe2+: Implement layout of mesh shading per-primitive inputs in PS thread payloads. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is based on a previous patch by Marcin Ĺšlusarz addressing the same issue, though it's largely rewritten, simplified and includes additional fixes. Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_fs.cpp | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 992abc5dffd..bdccc215be8 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1797,29 +1797,43 @@ fs_visitor::assign_urb_setup() struct brw_reg reg; assert(max_polygons > 0); + /* Calculate the base register on the thread payload of + * either the block of vertex setup data or the block of + * per-primitive constant data depending on whether we're + * accessing a primitive or vertex input. Also calculate + * the index of the input within that block. + */ + const bool per_prim = inst->src[i].nr < prog_data->num_per_primitive_inputs; + const unsigned base = urb_start + + (per_prim ? 0 : + ALIGN(prog_data->num_per_primitive_inputs / 2, + reg_unit(devinfo)) * max_polygons); + const unsigned idx = per_prim ? inst->src[i].nr : + inst->src[i].nr - prog_data->num_per_primitive_inputs; + /* Translate the offset within the param_width-wide * representation described above into an offset and a * grf, which contains the plane parameters for the first * polygon processed by the thread. */ - if (devinfo->ver >= 20) { + if (devinfo->ver >= 20 && !per_prim) { /* Gfx20+ is able to pack 5 logical input components - * per 64B register. + * per 64B register for vertex setup data. */ - const unsigned grf = urb_start + inst->src[i].nr / 5 * 2 * max_polygons; + const unsigned grf = base + idx / 5 * 2 * max_polygons; assert(inst->src[i].offset / param_width < 12); - const unsigned delta = inst->src[i].nr % 5 * 12 + + const unsigned delta = idx % 5 * 12 + inst->src[i].offset / (param_width * chan_sz) * chan_sz + inst->src[i].offset % chan_sz; reg = byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type), delta); } else { - /* Earlier platforms pack 2 logical input components - * per 32B register. + /* Earlier platforms and per-primitive block pack 2 logical + * input components per 32B register. */ - const unsigned grf = urb_start + inst->src[i].nr / 2 * max_polygons; + const unsigned grf = base + idx / 2 * max_polygons; assert(inst->src[i].offset / param_width < REG_SIZE / 2); - const unsigned delta = (inst->src[i].nr % 2) * (REG_SIZE / 2) + + const unsigned delta = (idx % 2) * (REG_SIZE / 2) + inst->src[i].offset / (param_width * chan_sz) * chan_sz + inst->src[i].offset % chan_sz; reg = byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),