From 99ce8b5a071e12e8832823ec250f125585493ae6 Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Tue, 21 May 2024 12:56:50 -0700 Subject: [PATCH] intel/compiler: Add indirect mov lowering pass Indirect addressing(vx1 and vxh) not supported with UB/B datatype for src0, so we need to change the data type for both dest and src0. This fixes following tests cases on Xe2+ - dEQP-VK.spirv_assembly.instruction.compute.8bit_storage.push_constant_8_to_16* - dEQP-VK.spirv_assembly.instruction.compute.8bit_storage.push_constant_8_to_32* Signed-off-by: Sagar Ghuge Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_fs.h | 1 + src/intel/compiler/brw_fs_lower.cpp | 69 +++++++++++++++++++++++++++++ src/intel/compiler/brw_fs_opt.cpp | 2 + 3 files changed, 72 insertions(+) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index ef9bc09ec5e..072f2cf0301 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -661,6 +661,7 @@ bool brw_fs_lower_dpas(fs_visitor &s); bool brw_fs_lower_find_live_channel(fs_visitor &s); bool brw_fs_lower_integer_multiplication(fs_visitor &s); bool brw_fs_lower_load_subgroup_invocation(fs_visitor &s); +bool brw_fs_lower_indirect_mov(fs_visitor &s); bool brw_fs_lower_logical_sends(fs_visitor &s); bool brw_fs_lower_pack(fs_visitor &s); bool brw_fs_lower_load_payload(fs_visitor &s); diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index 7cfeadd39be..2c36093e8a4 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -828,3 +828,72 @@ brw_fs_lower_load_subgroup_invocation(fs_visitor &s) return progress; } + +bool +brw_fs_lower_indirect_mov(fs_visitor &s) +{ + bool progress = false; + + if (s.devinfo->ver < 20) + return progress; + + foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { + if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT) { + if (brw_type_size_bytes(inst->src[0].type) > 1 && + brw_type_size_bytes(inst->dst.type) > 1) { + continue; + } + + assert(brw_type_size_bytes(inst->src[0].type) == + brw_type_size_bytes(inst->dst.type)); + + const fs_builder ibld(&s, block, inst); + + /* Extract unaligned part */ + uint16_t extra_offset = inst->src[0].offset & 0x1; + fs_reg offset = ibld.ADD(inst->src[1], brw_imm_uw(extra_offset)); + + /* Check if offset is odd or even so that we can choose either high or + * low byte from the result. + */ + fs_reg is_odd = ibld.AND(offset, brw_imm_ud(1)); + + /* Make sure offset is word (2-bytes) aligned */ + offset = ibld.AND(offset, brw_imm_uw(~1)); + + /* Indirect addressing(vx1 and vxh) not supported with UB/B datatype for + * Src0, so change data type for src0 and dst to UW. + */ + fs_reg dst = ibld.vgrf(BRW_TYPE_UW); + + /* Substract unaligned offset from src0 offset since we already + * accounted unaligned part in the indirect byte offset. + */ + fs_reg start = retype(inst->src[0], BRW_TYPE_UW); + start.offset &= ~extra_offset; + + /* Adjust length to account extra offset. */ + assert(inst->src[2].file == IMM); + fs_reg length = brw_imm_ud(inst->src[2].ud + extra_offset); + + ibld.emit(SHADER_OPCODE_MOV_INDIRECT, dst, start, offset, length); + + /* Select high byte if offset is odd otherwise select low byte. */ + fs_reg lo = ibld.AND(dst, brw_imm_uw(0xff)); + fs_reg hi = ibld.SHR(dst, brw_imm_uw(8)); + fs_reg result = ibld.vgrf(BRW_TYPE_UW); + ibld.CSEL(result, hi, lo, is_odd, BRW_CONDITIONAL_NZ); + + /* Extra MOV needed here to convert back to the corresponding B type */ + ibld.MOV(inst->dst, result); + + inst->remove(block); + progress = true; + } + } + + if (progress) + s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); + + return progress; +} diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp index 7705595f3b5..9c4f37b4c4e 100644 --- a/src/intel/compiler/brw_fs_opt.cpp +++ b/src/intel/compiler/brw_fs_opt.cpp @@ -162,6 +162,8 @@ brw_fs_optimize(fs_visitor &s) OPT(brw_fs_lower_uniform_pull_constant_loads); + OPT(brw_fs_lower_indirect_mov); + OPT(brw_fs_lower_find_live_channel); OPT(brw_fs_lower_load_subgroup_invocation);