diff --git a/src/intel/compiler/brw_compile_tcs.cpp b/src/intel/compiler/brw_compile_tcs.cpp index 10163a37833..7ef7f55555a 100644 --- a/src/intel/compiler/brw_compile_tcs.cpp +++ b/src/intel/compiler/brw_compile_tcs.cpp @@ -106,7 +106,7 @@ brw_emit_tcs_thread_end(brw_shader &s) */ brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16); + srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X); srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1); brw_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index c8bb547fd1d..38e9364e555 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -2469,9 +2469,7 @@ brw_shader::gs_urb_channel_mask(const brw_reg &dword_index) /* Set the channel masks to 1 << (dword_index % 4), so that we'll * write to the appropriate DWORD within the OWORD. */ - brw_reg channel = ubld.AND(dword_index, brw_imm_ud(3u)); - /* Then the channel masks need to be in bits 23:16. */ - return ubld.SHL(intexp2(ubld, channel), brw_imm_ud(16u)); + return intexp2(ubld, ubld.AND(dword_index, brw_imm_ud(3u))); } void @@ -3286,7 +3284,7 @@ brw_from_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb, brw_reg mask_reg; if (mask != WRITEMASK_XYZW) - mask_reg = brw_imm_ud(mask << 16); + mask_reg = brw_imm_ud(mask); brw_reg sources[4]; @@ -5278,7 +5276,7 @@ emit_urb_direct_vec4_write(const brw_builder &bld, brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); + srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask); srcs[URB_LOGICAL_SRC_DATA] = retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); @@ -5348,7 +5346,7 @@ emit_urb_direct_vec4_write_xe2(const brw_builder &bld, brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); + srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask); srcs[URB_LOGICAL_SRC_DATA] = retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps); @@ -5411,7 +5409,7 @@ emit_urb_indirect_vec4_write(const brw_builder &bld, brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); + srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask); srcs[URB_LOGICAL_SRC_DATA] = retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); @@ -5482,7 +5480,7 @@ emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr, brw_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = addr; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); + srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask); srcs[URB_LOGICAL_SRC_DATA] = retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F); srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps); @@ -5529,8 +5527,7 @@ emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr, bld8.ADD(quarter(retype(offset_src, BRW_TYPE_UD), q), brw_imm_ud(c + base_in_dwords)); brw_reg m = bld8.AND(off, brw_imm_ud(0x3)); - brw_reg t = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m); - brw_reg mask = bld8.SHL(t, brw_imm_ud(16)); + brw_reg mask = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m); brw_reg final_offset = bld8.SHR(off, brw_imm_ud(2)); brw_reg payload_srcs[4]; diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index b7d79373129..1429ef83a7f 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -154,8 +154,13 @@ lower_urb_write_logical_send(const brw_builder &bld, brw_inst *inst) if (per_slot_present) payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS]; - if (channel_mask_present) - payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK]; + if (channel_mask_present) { + payload_sources[header_size++] = + inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].file == IMM ? + brw_imm_ud(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].ud << 16) : + bld.SHL(retype(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK], BRW_TYPE_UD), + brw_imm_ud(16)); + } for (unsigned i = header_size, j = 0; i < length; i++, j++) payload_sources[i] = offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j); @@ -221,13 +226,17 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst) bld.ADD(payload, payload, offsets); } - const brw_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK]; - unsigned mask = 0; + unsigned num_channels_or_cmask = src_comps; - if (cmask.file != BAD_FILE) { - assert(cmask.file == IMM); + const brw_reg cmask = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK]; + brw_reg desc = brw_imm_ud(0); + if (cmask.file == IMM) { assert(cmask.type == BRW_TYPE_UD); - mask = cmask.ud >> 16; + num_channels_or_cmask = cmask.ud; + } else if (cmask.file != BAD_FILE) { + const brw_builder &ubld = bld.exec_all().group(8, 0); + desc = component(ubld.SHL(retype(cmask, BRW_TYPE_UD), brw_imm_ud(12)), 0); + num_channels_or_cmask = 0; } brw_reg payload2 = bld.move_to_vgrf(src, src_comps); @@ -235,11 +244,11 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst) inst->sfid = BRW_SFID_URB; - enum lsc_opcode op = mask ? LSC_OP_STORE_CMASK : LSC_OP_STORE; + enum lsc_opcode op = cmask.file != BAD_FILE ? LSC_OP_STORE_CMASK : LSC_OP_STORE; inst->desc = lsc_msg_desc(devinfo, op, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32, LSC_DATA_SIZE_D32, - mask ? mask : src_comps /* num_channels */, + num_channels_or_cmask, false /* transpose */, LSC_CACHE(devinfo, STORE, L1UC_L3UC)); @@ -254,7 +263,7 @@ lower_urb_write_logical_send_xe2(const brw_builder &bld, brw_inst *inst) inst->resize_sources(SEND_NUM_SRCS); - inst->src[SEND_SRC_DESC] = brw_imm_ud(0); + inst->src[SEND_SRC_DESC] = desc; inst->src[SEND_SRC_EX_DESC] = brw_imm_ud(0); inst->src[SEND_SRC_PAYLOAD1] = payload; inst->src[SEND_SRC_PAYLOAD2] = payload2; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index e2e4b9e2d1e..e63cb19b322 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -330,7 +330,7 @@ brw_shader::emit_urb_writes(const brw_reg &gs_vertex_count) * 4 slots data. All are explicitly zeros in order to to keep the MBZ * area written as zeros. */ - bld.exec_all().MOV(uniform_mask, brw_imm_ud(0x10000u)); + bld.exec_all().MOV(uniform_mask, brw_imm_ud(0x1u)); bld.exec_all().MOV(offset(payload, bld, 0), brw_imm_ud(0u)); bld.exec_all().MOV(offset(payload, bld, 1), brw_imm_ud(0u)); bld.exec_all().MOV(offset(payload, bld, 2), brw_imm_ud(0u));