mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 07:08:04 +02:00
brw: move URB channel mask shifting to the lowering pass
For example Xe2 uses the LSC and doesn´t need the shifting, so let's just apply it where it's needed. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36757>
This commit is contained in:
parent
99cf8273f6
commit
c871a62a75
4 changed files with 28 additions and 22 deletions
|
|
@ -106,7 +106,7 @@ brw_emit_tcs_thread_end(brw_shader &s)
|
|||
*/
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X);
|
||||
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
|
||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
|
||||
brw_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
|
|
|
|||
|
|
@ -2469,9 +2469,7 @@ brw_shader::gs_urb_channel_mask(const brw_reg &dword_index)
|
|||
/* Set the channel masks to 1 << (dword_index % 4), so that we'll
|
||||
* write to the appropriate DWORD within the OWORD.
|
||||
*/
|
||||
brw_reg channel = ubld.AND(dword_index, brw_imm_ud(3u));
|
||||
/* Then the channel masks need to be in bits 23:16. */
|
||||
return ubld.SHL(intexp2(ubld, channel), brw_imm_ud(16u));
|
||||
return intexp2(ubld, ubld.AND(dword_index, brw_imm_ud(3u)));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -3286,7 +3284,7 @@ brw_from_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb,
|
|||
|
||||
brw_reg mask_reg;
|
||||
if (mask != WRITEMASK_XYZW)
|
||||
mask_reg = brw_imm_ud(mask << 16);
|
||||
mask_reg = brw_imm_ud(mask);
|
||||
|
||||
brw_reg sources[4];
|
||||
|
||||
|
|
@ -5278,7 +5276,7 @@ emit_urb_direct_vec4_write(const brw_builder &bld,
|
|||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
|
||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
|
||||
|
|
@ -5348,7 +5346,7 @@ emit_urb_direct_vec4_write_xe2(const brw_builder &bld,
|
|||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
|
||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
|
||||
|
|
@ -5411,7 +5409,7 @@ emit_urb_indirect_vec4_write(const brw_builder &bld,
|
|||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
|
||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
|
||||
|
|
@ -5482,7 +5480,7 @@ emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
|
|||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = addr;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
|
||||
srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
|
||||
|
|
@ -5529,8 +5527,7 @@ emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
|
|||
bld8.ADD(quarter(retype(offset_src, BRW_TYPE_UD), q),
|
||||
brw_imm_ud(c + base_in_dwords));
|
||||
brw_reg m = bld8.AND(off, brw_imm_ud(0x3));
|
||||
brw_reg t = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m);
|
||||
brw_reg mask = bld8.SHL(t, brw_imm_ud(16));
|
||||
brw_reg mask = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m);
|
||||
brw_reg final_offset = bld8.SHR(off, brw_imm_ud(2));
|
||||
|
||||
brw_reg payload_srcs[4];
|
||||
|
|
|
|||
|
|
@ -154,8 +154,13 @@ lower_urb_write_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
if (per_slot_present)
|
||||
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS];
|
||||
|
||||
if (channel_mask_present)
|
||||
payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
|
||||
if (channel_mask_present) {
|
||||
payload_sources[header_size++] =
|
||||
inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].file == IMM ?
|
||||
brw_imm_ud(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].ud << 16) :
|
||||
bld.SHL(retype(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK], BRW_TYPE_UD),
|
||||
brw_imm_ud(16));
|
||||
}
|
||||
|
||||
for (unsigned i = header_size, j = 0; i < length; i++, j++)
|
||||
payload_sources[i] = offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j);
|
||||
|
|
@ -221,13 +226,17 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst)
|
|||
bld.ADD(payload, payload, offsets);
|
||||
}
|
||||
|
||||
const brw_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
|
||||
unsigned mask = 0;
|
||||
unsigned num_channels_or_cmask = src_comps;
|
||||
|
||||
if (cmask.file != BAD_FILE) {
|
||||
assert(cmask.file == IMM);
|
||||
const brw_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK];
|
||||
brw_reg desc = brw_imm_ud(0);
|
||||
if (cmask.file == IMM) {
|
||||
assert(cmask.type == BRW_TYPE_UD);
|
||||
mask = cmask.ud >> 16;
|
||||
num_channels_or_cmask = cmask.ud;
|
||||
} else if (cmask.file != BAD_FILE) {
|
||||
const brw_builder &ubld = bld.exec_all().group(8, 0);
|
||||
desc = component(ubld.SHL(retype(cmask, BRW_TYPE_UD), brw_imm_ud(12)), 0);
|
||||
num_channels_or_cmask = 0;
|
||||
}
|
||||
|
||||
brw_reg payload2 = bld.move_to_vgrf(src, src_comps);
|
||||
|
|
@ -235,11 +244,11 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst)
|
|||
|
||||
inst->sfid = BRW_SFID_URB;
|
||||
|
||||
enum lsc_opcode op = mask ? LSC_OP_STORE_CMASK : LSC_OP_STORE;
|
||||
enum lsc_opcode op = cmask.file != BAD_FILE ? LSC_OP_STORE_CMASK : LSC_OP_STORE;
|
||||
inst->desc = lsc_msg_desc(devinfo, op,
|
||||
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32,
|
||||
LSC_DATA_SIZE_D32,
|
||||
mask ? mask : src_comps /* num_channels */,
|
||||
num_channels_or_cmask,
|
||||
false /* transpose */,
|
||||
LSC_CACHE(devinfo, STORE, L1UC_L3UC));
|
||||
|
||||
|
|
@ -254,7 +263,7 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst)
|
|||
|
||||
inst->resize_sources(SEND_NUM_SRCS);
|
||||
|
||||
inst->src[SEND_SRC_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_DESC] = desc;
|
||||
inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0);
|
||||
inst->src[SEND_SRC_PAYLOAD1] = payload;
|
||||
inst->src[SEND_SRC_PAYLOAD2] = payload2;
|
||||
|
|
|
|||
|
|
@ -330,7 +330,7 @@ brw_shader::emit_urb_writes(const brw_reg &gs_vertex_count)
|
|||
* 4 slots data. All are explicitly zeros in order to to keep the MBZ
|
||||
* area written as zeros.
|
||||
*/
|
||||
bld.exec_all().MOV(uniform_mask, brw_imm_ud(0x10000u));
|
||||
bld.exec_all().MOV(uniform_mask, brw_imm_ud(0x1u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 0), brw_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0u));
|
||||
bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue