intel/fs/xe2+: Update for new layout of vertex setup data in PS payload.

The interpolation deltas of PS inputs now show up as a 12B vec3 (A0,
A1-A0, A2-A0) in the ATTR file, instead of the previously used 16B
format with an unused component.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26606>
This commit is contained in:
Francisco Jerez 2023-12-01 16:23:11 -08:00 committed by Caio Oliveira
parent d622e19f00
commit 702eabaaae
2 changed files with 58 additions and 19 deletions

View file

@ -1742,10 +1742,10 @@ fs_visitor::assign_urb_setup()
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == ATTR) {
/* ATTR fs_reg::nr in the FS is in units of logical scalar
* inputs each of which consumes half of a GRF register on
* current platforms. In single polygon mode this leads
* to the following layout of the vertex setup plane
* parameters in the ATTR register file:
* inputs each of which consumes 16B on Gfx4-Gfx12. In
* single polygon mode this leads to the following layout
* of the vertex setup plane parameters in the ATTR
* register file:
*
* fs_reg::nr Input Comp0 Comp1 Comp2 Comp3
* 0 Attr0.x a1-a0 a2-a0 N/A a0
@ -1782,27 +1782,49 @@ fs_visitor::assign_urb_setup()
* The latter layout corresponds to a param_width equal to
* dispatch_width, while the former (scalar parameter)
* layout has a param_width of 1.
*
* Gfx20+ represent plane parameters in a format similar
* to the above, except the parameters are packed in 12B
* and ordered like "a0, a1-a0, a2-a0" instead of the
* above vec4 representation with a missing component.
*/
const unsigned param_width = (max_polygons > 1 ? dispatch_width : 1);
assert(inst->src[i].offset / param_width < REG_SIZE / 2);
assert(max_polygons > 0);
/* Size of a single scalar component of a plane parameter
* in bytes.
*/
const unsigned chan_sz = 4;
struct brw_reg reg;
assert(max_polygons > 0);
/* Translate the offset within the param_width-wide
* representation described above into an offset into grf,
* which contains plane parameters for the first polygon
* handled by the thread.
* representation described above into an offset and a
* grf, which contains the plane parameters for the first
* polygon processed by the thread.
*/
const unsigned grf = urb_start + inst->src[i].nr / 2 * max_polygons;
const unsigned delta = (inst->src[i].nr % 2) * (REG_SIZE / 2) +
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
inst->src[i].offset % chan_sz;
struct brw_reg reg =
byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type), delta);
if (devinfo->ver >= 20) {
/* Gfx20+ is able to pack 5 logical input components
* per 64B register.
*/
const unsigned grf = urb_start + inst->src[i].nr / 5 * 2;
assert(inst->src[i].offset / param_width < 12);
const unsigned delta = inst->src[i].nr % 5 * 12 +
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
inst->src[i].offset % chan_sz;
reg = byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
delta);
} else {
/* Earlier platforms pack 2 logical input components
* per 32B register.
*/
const unsigned grf = urb_start + inst->src[i].nr / 2 * max_polygons;
assert(inst->src[i].offset / param_width < REG_SIZE / 2);
const unsigned delta = (inst->src[i].nr % 2) * (REG_SIZE / 2) +
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
inst->src[i].offset % chan_sz;
reg = byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
delta);
}
if (max_polygons > 1) {
assert(devinfo->ver == 12);

View file

@ -4237,9 +4237,14 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
retype(s.per_primitive_reg(bld, base, comp + i), dest.type));
}
} else {
/* Gfx20+ packs the plane parameters of a single logical
* input in a vec3 format instead of the previously used vec4
* format.
*/
const unsigned k = devinfo->ver >= 20 ? 0 : 3;
for (unsigned int i = 0; i < num_components; i++) {
bld.MOV(offset(dest, bld, i),
retype(s.interp_reg(bld, base, comp + i, 3), dest.type));
retype(s.interp_reg(bld, base, comp + i, k), dest.type));
}
}
break;
@ -4251,9 +4256,21 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
const unsigned base = nir_intrinsic_base(instr);
const unsigned comp = nir_intrinsic_component(instr);
dest.type = BRW_REGISTER_TYPE_F;
bld.MOV(offset(dest, bld, 0), s.interp_reg(bld, base, comp, 3));
bld.MOV(offset(dest, bld, 1), s.interp_reg(bld, base, comp, 1));
bld.MOV(offset(dest, bld, 2), s.interp_reg(bld, base, comp, 0));
/* Gfx20+ packs the plane parameters of a single logical
* input in a vec3 format instead of the previously used vec4
* format.
*/
if (devinfo->ver >= 20) {
bld.MOV(offset(dest, bld, 0), s.interp_reg(bld, base, comp, 0));
bld.MOV(offset(dest, bld, 1), s.interp_reg(bld, base, comp, 2));
bld.MOV(offset(dest, bld, 2), s.interp_reg(bld, base, comp, 1));
} else {
bld.MOV(offset(dest, bld, 0), s.interp_reg(bld, base, comp, 3));
bld.MOV(offset(dest, bld, 1), s.interp_reg(bld, base, comp, 1));
bld.MOV(offset(dest, bld, 2), s.interp_reg(bld, base, comp, 0));
}
break;
}