From 437bda3013d0fb137e1576199fe74ee09ec85d2b Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 5 Aug 2024 21:05:07 -0700 Subject: [PATCH] intel/brw: Get rid of the lsc_msg_desc_wcmask helper The LOAD/STORE opcodes take a vector size, while the LOAD/STORE_CMASK opcodes take a channel mask. The two are mutually exclusive. So we can just have the lsc_msg_desc() helper take one or the other in the same parameter. This more closely matches the actual descriptor. We couldn't do this until the previous commit, since we were previously relying on the lsc_msg_desc() function to calculate a cmask out of the number of vector components. But now we don't need it to do that. Reviewed-by: Lionel Landwerlin Reviewed-by: Sagar Ghuge Part-of: --- src/intel/compiler/brw_eu.h | 22 +++---------- .../compiler/brw_lower_logical_sends.cpp | 32 +++++++++---------- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index baa4870b3d6..11de4e22633 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1136,12 +1136,12 @@ lsc_vect_size(unsigned vect_size) } static inline uint32_t -lsc_msg_desc_wcmask(const struct intel_device_info *devinfo, +lsc_msg_desc(const struct intel_device_info *devinfo, enum lsc_opcode opcode, enum lsc_addr_surface_type addr_type, enum lsc_addr_size addr_sz, - enum lsc_data_size data_sz, unsigned num_channels, - bool transpose, unsigned cache_ctrl, unsigned cmask) + enum lsc_data_size data_sz, unsigned num_channels_or_cmask, + bool transpose, unsigned cache_ctrl) { assert(devinfo->has_lsc); assert(!transpose || lsc_opcode_has_transpose(opcode)); @@ -1156,25 +1156,13 @@ lsc_msg_desc_wcmask(const struct intel_device_info *devinfo, SET_BITS(addr_type, 30, 29); if (lsc_opcode_has_cmask(opcode)) - msg_desc |= SET_BITS(cmask, 15, 12); + msg_desc |= SET_BITS(num_channels_or_cmask, 15, 12); else - msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12); + msg_desc |= SET_BITS(lsc_vect_size(num_channels_or_cmask), 14, 12); return msg_desc; } -static inline uint32_t -lsc_msg_desc(UNUSED const struct intel_device_info *devinfo, - enum lsc_opcode opcode, - enum lsc_addr_surface_type addr_type, - enum lsc_addr_size addr_sz, - enum lsc_data_size data_sz, unsigned num_channels, - bool transpose, unsigned cache_ctrl) -{ - return lsc_msg_desc_wcmask(devinfo, opcode, addr_type, addr_sz, - data_sz, num_channels, transpose, cache_ctrl, 0); -} - static inline enum lsc_opcode lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo, uint32_t desc) diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 301f408a442..3e6a7e1ba02 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -238,12 +238,12 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst) inst->sfid = BRW_SFID_URB; enum lsc_opcode op = mask ? LSC_OP_STORE_CMASK : LSC_OP_STORE; - inst->desc = lsc_msg_desc_wcmask(devinfo, op, + inst->desc = lsc_msg_desc(devinfo, op, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A32, - LSC_DATA_SIZE_D32, src_comps /* num_channels */, + LSC_DATA_SIZE_D32, + mask ? mask : src_comps /* num_channels */, false /* transpose */, - LSC_CACHE(devinfo, STORE, L1UC_L3UC), - mask); + LSC_CACHE(devinfo, STORE, L1UC_L3UC)); /* Update the original instruction. */ @@ -1743,12 +1743,12 @@ lower_lsc_surface_logical_send(bblock_t *block, const fs_builder &bld, switch (inst->opcode) { case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: num_components = arg.ud; - inst->desc = lsc_msg_desc_wcmask(devinfo, LSC_OP_LOAD_CMASK, - surf_type, LSC_ADDR_SIZE_A32, - LSC_DATA_SIZE_D32, num_components, - false /* transpose */, - LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), - BITSET_MASK(num_components)); + inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, + surf_type, LSC_ADDR_SIZE_A32, + LSC_DATA_SIZE_D32, + BITSET_MASK(num_components), + false /* transpose */, + LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS)); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: num_components = arg.ud; @@ -1760,12 +1760,12 @@ lower_lsc_surface_logical_send(bblock_t *block, const fs_builder &bld, break; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: num_components = arg.ud; - inst->desc = lsc_msg_desc_wcmask(devinfo, LSC_OP_STORE_CMASK, - surf_type, LSC_ADDR_SIZE_A32, - LSC_DATA_SIZE_D32, num_components, - false /* transpose */, - LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), - BITSET_MASK(num_components)); + inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, + surf_type, LSC_ADDR_SIZE_A32, + LSC_DATA_SIZE_D32, + BITSET_MASK(num_components), + false /* transpose */, + LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS)); break; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: num_components = arg.ud;