diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 44f150aa166..5505787af40 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -6544,6 +6544,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, get_nir_buffer_intrinsic_index(ntb, bld, instr); } else { srcs[SURFACE_LOGICAL_SRC_SURFACE] = fs_reg(brw_imm_ud(GFX7_BTI_SLM)); + + /* SLM has to use aligned OWord Block Read messages on pre-LSC HW. */ + assert(devinfo->has_lsc || nir_intrinsic_align(instr) >= 16); } const unsigned total_dwords = ALIGN(instr->num_components, diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index e3caebd8e13..c28a40c18bb 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1930,8 +1930,14 @@ lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst) const bool has_side_effects = inst->has_side_effects(); + /* SLM block reads must use the 16B-aligned OWord Block Read messages, + * as the unaligned message doesn't exist for SLM. However, we still + * use SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL in that case + * (to avoid adding more opcodes), but only emit it with 16B alignment. + */ const bool align_16B = - inst->opcode != SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL; + inst->opcode != SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL || + (surface.file == IMM && surface.ud == GFX7_BTI_SLM); const bool write = inst->opcode == SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL; diff --git a/src/intel/compiler/intel_nir_blockify_uniform_loads.c b/src/intel/compiler/intel_nir_blockify_uniform_loads.c index 2ad0a117a34..40dd87ebec6 100644 --- a/src/intel/compiler/intel_nir_blockify_uniform_loads.c +++ b/src/intel/compiler/intel_nir_blockify_uniform_loads.c @@ -69,8 +69,8 @@ intel_nir_blockify_uniform_loads_instr(nir_builder *b, return true; case nir_intrinsic_load_shared: - /* Block loads on shared memory are not supported before the LSC. */ - if (!devinfo->has_lsc) + /* Block loads on shared memory are not supported before Icelake. */ + if (devinfo->ver < 11) return false; if (nir_src_is_divergent(intrin->src[0])) @@ -79,6 +79,14 @@ intel_nir_blockify_uniform_loads_instr(nir_builder *b, if (intrin->def.bit_size != 32) return false; + /* Without the LSC, we have to use OWord Block Load messages (the one + * that requires OWord aligned offsets, too). + */ + if (!devinfo->has_lsc && + (intrin->def.num_components < 4 || + nir_intrinsic_align(intrin) < 16)) + return false; + intrin->intrinsic = nir_intrinsic_load_shared_uniform_block_intel; return true;