brw: Use LSC extended descriptor offsets for Xe2 URB messages
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

URB messages on Xe2 are LSC messages with FLAT addressing.  We can
specify a S19 immediate offset in the extended message descriptor,
which should be more than adequate to hold any offsets we need.

We wrote the original URB code before implementing that, and never
doubled back to take advantage of it.  But doing so can drop ADDs
near every URB access.

fossil-db results on Battlemage:

   Totals:
   Instrs: 232239759 -> 231432254 (-0.35%)
   Cycle count: 34044435848.0 -> 34055507100.0 (+0.03%); split: -0.00%, +0.04%
   Spill count: 520370 -> 520362 (-0.00%); split: -0.00%, +0.00%
   Fill count: 470790 -> 470803 (+0.00%); split: -0.00%, +0.00%
   Max live registers: 72111853 -> 72111369 (-0.00%); split: -0.00%, +0.00%

   Totals from 227920 (28.89% of 788851) affected shaders:
   Instrs: 59841897 -> 59034392 (-1.35%)
   Cycle count: 683385208.0 -> 694456460.0 (+1.62%); split: -0.14%, +1.76%
   Spill count: 17278 -> 17270 (-0.05%); split: -0.10%, +0.06%
   Fill count: 17481 -> 17494 (+0.07%); split: -0.03%, +0.10%
   Max live registers: 23052652 -> 23052168 (-0.00%); split: -0.00%, +0.00%

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38899>
This commit is contained in:
Kenneth Graunke 2025-12-09 03:35:12 -08:00 committed by Marge Bot
parent 9482d392a1
commit bdacab49c7

View file

@ -32,6 +32,11 @@
#include "util/bitpack_helpers.h"
static void
setup_lsc_surface_descriptors(const brw_builder &bld, brw_send_inst *send,
uint32_t desc, const brw_reg &surface,
int32_t base_offset);
static inline brw_send_inst *
brw_transform_inst_to_send(const brw_builder &bld, brw_inst *inst)
{
@ -91,6 +96,7 @@ lower_urb_read_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb)
/* Get the logical send arguments. */
const brw_reg handle = urb->src[URB_LOGICAL_SRC_HANDLE];
const unsigned offset = urb->offset;
/* Calculate the total number of components of the payload. */
const unsigned dst_comps = urb->size_written / (REG_SIZE * reg_unit(devinfo));
@ -99,14 +105,6 @@ lower_urb_read_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb)
bld.MOV(payload, handle);
/* The low 24-bits of the URB handle is a byte offset into the URB area.
* Add the byte offset of the write to this value.
*/
if (urb->offset) {
bld.ADD(payload, payload, brw_imm_ud(urb->offset));
urb->offset = 0;
}
brw_reg offsets = urb->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
if (offsets.file != BAD_FILE) {
bld.ADD(payload, payload, offsets);
@ -132,8 +130,8 @@ lower_urb_read_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb)
send->has_side_effects = true;
send->is_volatile = false;
send->src[SEND_SRC_DESC] = brw_imm_ud(0);
send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
setup_lsc_surface_descriptors(bld, send, send->desc, brw_reg(), offset);
send->src[SEND_SRC_PAYLOAD1] = payload;
send->src[SEND_SRC_PAYLOAD2] = brw_reg();
}
@ -211,6 +209,7 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb)
const brw_reg src = urb->components_read(URB_LOGICAL_SRC_DATA) ?
urb->src[URB_LOGICAL_SRC_DATA] : brw_reg(brw_imm_ud(0));
assert(brw_type_size_bytes(src.type) == 4);
const unsigned offset = urb->offset;
/* Calculate the total number of components of the payload. */
const unsigned src_comps = MAX2(1, urb->components_read(URB_LOGICAL_SRC_DATA));
@ -220,14 +219,6 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb)
bld.MOV(payload, handle);
/* The low 24-bits of the URB handle is a byte offset into the URB area.
* Add the byte offset of the write to this value.
*/
if (urb->offset) {
bld.ADD(payload, payload, brw_imm_ud(urb->offset));
urb->offset = 0;
}
brw_reg offsets = urb->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
if (offsets.file != BAD_FILE) {
bld.ADD(payload, payload, offsets);
@ -262,6 +253,7 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb)
false /* transpose */,
LSC_CACHE(devinfo, STORE, L1UC_L3UC));
setup_lsc_surface_descriptors(bld, send, send->desc, brw_reg(), offset);
send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, send->exec_size);
send->ex_mlen = ex_mlen;
@ -270,7 +262,6 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb)
send->is_volatile = false;
send->src[SEND_SRC_DESC] = desc;
send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
send->src[SEND_SRC_PAYLOAD1] = payload;
send->src[SEND_SRC_PAYLOAD2] = payload2;
}