brw: move URB channel mask shifting to the lowering pass

For example Xe2 uses the LSC and doesn´t need the shifting, so let's
just apply it where it's needed.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36757>
This commit is contained in:
Lionel Landwerlin 2024-08-22 12:21:19 +03:00 committed by Marge Bot
parent 99cf8273f6
commit c871a62a75
4 changed files with 28 additions and 22 deletions

View file

@ -106,7 +106,7 @@ brw_emit_tcs_thread_end(brw_shader &s)
*/
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X);
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
brw_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,

View file

@ -2469,9 +2469,7 @@ brw_shader::gs_urb_channel_mask(const brw_reg &dword_index)
/* Set the channel masks to 1 << (dword_index % 4), so that we'll
* write to the appropriate DWORD within the OWORD.
*/
brw_reg channel = ubld.AND(dword_index, brw_imm_ud(3u));
/* Then the channel masks need to be in bits 23:16. */
return ubld.SHL(intexp2(ubld, channel), brw_imm_ud(16u));
return intexp2(ubld, ubld.AND(dword_index, brw_imm_ud(3u)));
}
void
@ -3286,7 +3284,7 @@ brw_from_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb,
brw_reg mask_reg;
if (mask != WRITEMASK_XYZW)
mask_reg = brw_imm_ud(mask << 16);
mask_reg = brw_imm_ud(mask);
brw_reg sources[4];
@ -5278,7 +5276,7 @@ emit_urb_direct_vec4_write(const brw_builder &bld,
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
@ -5348,7 +5346,7 @@ emit_urb_direct_vec4_write_xe2(const brw_builder &bld,
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
@ -5411,7 +5409,7 @@ emit_urb_indirect_vec4_write(const brw_builder &bld,
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
@ -5482,7 +5480,7 @@ emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = addr;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
@ -5529,8 +5527,7 @@ emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
bld8.ADD(quarter(retype(offset_src, BRW_TYPE_UD), q),
brw_imm_ud(c + base_in_dwords));
brw_reg m = bld8.AND(off, brw_imm_ud(0x3));
brw_reg t = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m);
brw_reg mask = bld8.SHL(t, brw_imm_ud(16));
brw_reg mask = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m);
brw_reg final_offset = bld8.SHR(off, brw_imm_ud(2));
brw_reg payload_srcs[4];

View file

@ -154,8 +154,13 @@ lower_urb_write_logical_send(const brw_builder &bld, brw_inst *inst)
if (per_slot_present)
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
if (channel_mask_present)
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
if (channel_mask_present) {
payload_sources[header_size++] =
inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].file == IMM ?
brw_imm_ud(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].ud << 16) :
bld.SHL(retype(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK], BRW_TYPE_UD),
brw_imm_ud(16));
}
for (unsigned i = header_size, j = 0; i < length; i++, j++)
payload_sources[i] = offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j);
@ -221,13 +226,17 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst)
bld.ADD(payload, payload, offsets);
}
const brw_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
unsigned mask = 0;
unsigned num_channels_or_cmask = src_comps;
if (cmask.file != BAD_FILE) {
assert(cmask.file == IMM);
const brw_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
brw_reg desc = brw_imm_ud(0);
if (cmask.file == IMM) {
assert(cmask.type == BRW_TYPE_UD);
mask = cmask.ud >> 16;
num_channels_or_cmask = cmask.ud;
} else if (cmask.file != BAD_FILE) {
const brw_builder &ubld = bld.exec_all().group(8, 0);
desc = component(ubld.SHL(retype(cmask, BRW_TYPE_UD), brw_imm_ud(12)), 0);
num_channels_or_cmask = 0;
}
brw_reg payload2 = bld.move_to_vgrf(src, src_comps);
@ -235,11 +244,11 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst)
inst->sfid = BRW_SFID_URB;
enum lsc_opcode op = mask ? LSC_OP_STORE_CMASK : LSC_OP_STORE;
enum lsc_opcode op = cmask.file != BAD_FILE ? LSC_OP_STORE_CMASK : LSC_OP_STORE;
inst->desc = lsc_msg_desc(devinfo, op,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32,
LSC_DATA_SIZE_D32,
mask ? mask : src_comps /* num_channels */,
num_channels_or_cmask,
false /* transpose */,
LSC_CACHE(devinfo, STORE, L1UC_L3UC));
@ -254,7 +263,7 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst)
inst->resize_sources(SEND_NUM_SRCS);
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
inst->src[SEND_SRC_DESC] = desc;
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
inst->src[SEND_SRC_PAYLOAD1] = payload;
inst->src[SEND_SRC_PAYLOAD2] = payload2;

View file

@ -330,7 +330,7 @@ brw_shader::emit_urb_writes(const brw_reg &gs_vertex_count)
* 4 slots data. All are explicitly zeros in order to to keep the MBZ
* area written as zeros.
*/
bld.exec_all().MOV(uniform_mask, brw_imm_ud(0x10000u));
bld.exec_all().MOV(uniform_mask, brw_imm_ud(0x1u));
bld.exec_all().MOV(offset(payload, bld, 0), brw_imm_ud(0u));
bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0u));
bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));