From bdacab49c7b965db35aac705068f7937afe64e6a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 9 Dec 2025 03:35:12 -0800 Subject: [PATCH] brw: Use LSC extended descriptor offsets for Xe2 URB messages URB messages on Xe2 are LSC messages with FLAT addressing. We can specify a S19 immediate offset in the extended message descriptor, which should be more than adequate to hold any offsets we need. We wrote the original URB code before implementing that, and never doubled back to take advantage of it. But doing so can drop ADDs near every URB access. fossil-db results on Battlemage: Totals: Instrs: 232239759 -> 231432254 (-0.35%) Cycle count: 34044435848.0 -> 34055507100.0 (+0.03%); split: -0.00%, +0.04% Spill count: 520370 -> 520362 (-0.00%); split: -0.00%, +0.00% Fill count: 470790 -> 470803 (+0.00%); split: -0.00%, +0.00% Max live registers: 72111853 -> 72111369 (-0.00%); split: -0.00%, +0.00% Totals from 227920 (28.89% of 788851) affected shaders: Instrs: 59841897 -> 59034392 (-1.35%) Cycle count: 683385208.0 -> 694456460.0 (+1.62%); split: -0.14%, +1.76% Spill count: 17278 -> 17270 (-0.05%); split: -0.10%, +0.06% Fill count: 17481 -> 17494 (+0.07%); split: -0.03%, +0.10% Max live registers: 23052652 -> 23052168 (-0.00%); split: -0.00%, +0.00% Reviewed-by: Lionel Landwerlin Reviewed-by: Rohan Garg Part-of: --- .../compiler/brw/brw_lower_logical_sends.cpp | 29 +++++++------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/intel/compiler/brw/brw_lower_logical_sends.cpp b/src/intel/compiler/brw/brw_lower_logical_sends.cpp index a893642e4fb..cfda70917d7 100644 --- a/src/intel/compiler/brw/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw/brw_lower_logical_sends.cpp @@ -32,6 +32,11 @@ #include "util/bitpack_helpers.h" +static void +setup_lsc_surface_descriptors(const brw_builder &bld, brw_send_inst *send, + uint32_t desc, const brw_reg &surface, + int32_t base_offset); + static inline brw_send_inst * brw_transform_inst_to_send(const brw_builder &bld, brw_inst *inst) { @@ -91,6 +96,7 @@ lower_urb_read_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb) /* Get the logical send arguments. */ const brw_reg handle = urb->src[URB_LOGICAL_SRC_HANDLE]; + const unsigned offset = urb->offset; /* Calculate the total number of components of the payload. */ const unsigned dst_comps = urb->size_written / (REG_SIZE * reg_unit(devinfo)); @@ -99,14 +105,6 @@ lower_urb_read_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb) bld.MOV(payload, handle); - /* The low 24-bits of the URB handle is a byte offset into the URB area. - * Add the byte offset of the write to this value. - */ - if (urb->offset) { - bld.ADD(payload, payload, brw_imm_ud(urb->offset)); - urb->offset = 0; - } - brw_reg offsets = urb->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; if (offsets.file != BAD_FILE) { bld.ADD(payload, payload, offsets); @@ -132,8 +130,8 @@ lower_urb_read_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb) send->has_side_effects = true; send->is_volatile = false; - send->src[SEND_SRC_DESC] = brw_imm_ud(0); - send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); + setup_lsc_surface_descriptors(bld, send, send->desc, brw_reg(), offset); + send->src[SEND_SRC_PAYLOAD1] = payload; send->src[SEND_SRC_PAYLOAD2] = brw_reg(); } @@ -211,6 +209,7 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb) const brw_reg src = urb->components_read(URB_LOGICAL_SRC_DATA) ? urb->src[URB_LOGICAL_SRC_DATA] : brw_reg(brw_imm_ud(0)); assert(brw_type_size_bytes(src.type) == 4); + const unsigned offset = urb->offset; /* Calculate the total number of components of the payload. */ const unsigned src_comps = MAX2(1, urb->components_read(URB_LOGICAL_SRC_DATA)); @@ -220,14 +219,6 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb) bld.MOV(payload, handle); - /* The low 24-bits of the URB handle is a byte offset into the URB area. - * Add the byte offset of the write to this value. - */ - if (urb->offset) { - bld.ADD(payload, payload, brw_imm_ud(urb->offset)); - urb->offset = 0; - } - brw_reg offsets = urb->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; if (offsets.file != BAD_FILE) { bld.ADD(payload, payload, offsets); @@ -262,6 +253,7 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb) false /* transpose */, LSC_CACHE(devinfo, STORE, L1UC_L3UC)); + setup_lsc_surface_descriptors(bld, send, send->desc, brw_reg(), offset); send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, send->exec_size); send->ex_mlen = ex_mlen; @@ -270,7 +262,6 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_urb_inst *urb) send->is_volatile = false; send->src[SEND_SRC_DESC] = desc; - send->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); send->src[SEND_SRC_PAYLOAD1] = payload; send->src[SEND_SRC_PAYLOAD2] = payload2; }