diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 05e9fd46c7f..d00b4990755 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -536,6 +536,8 @@ enum opcode { SHADER_OPCODE_READ_ARCH_REG, + SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION, + RT_OPCODE_TRACE_RAY_LOGICAL, }; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index c4126d694cf..6ca33a4e313 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2464,6 +2464,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "btd_retire_logical"; case SHADER_OPCODE_READ_ARCH_REG: return "read_arch_reg"; + case SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION: + return "load_subgroup_invocation"; } unreachable("not reached"); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index eb92c84da70..dde29f8a284 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -599,6 +599,7 @@ bool brw_fs_lower_derivatives(fs_visitor &s); bool brw_fs_lower_dpas(fs_visitor &s); bool brw_fs_lower_find_live_channel(fs_visitor &s); bool brw_fs_lower_integer_multiplication(fs_visitor &s); +bool brw_fs_lower_load_subgroup_invocation(fs_visitor &s); bool brw_fs_lower_logical_sends(fs_visitor &s); bool brw_fs_lower_pack(fs_visitor &s); bool brw_fs_lower_load_payload(fs_visitor &s); diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index cdab24904ea..d6d4da8e3fc 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -703,3 +703,47 @@ brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s) s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW | DEPENDENCY_VARIABLES); } + +bool +brw_fs_lower_load_subgroup_invocation(fs_visitor &s) +{ + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { + if (inst->opcode != SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION) + continue; + + const fs_builder abld = + fs_builder(&s, block, inst).annotate("SubgroupInvocation", NULL); + const fs_builder ubld8 = abld.group(8, 0).exec_all(); + + if (inst->exec_size == 8) { + assert(inst->dst.type == BRW_TYPE_UD); + fs_reg uw = retype(inst->dst, BRW_TYPE_UW); + ubld8.MOV(uw, brw_imm_v(0x76543210)); + ubld8.MOV(inst->dst, uw); + } else { + assert(inst->dst.type == BRW_TYPE_UW); + abld.UNDEF(inst->dst); + ubld8.MOV(inst->dst, brw_imm_v(0x76543210)); + ubld8.ADD(byte_offset(inst->dst, 16), inst->dst, brw_imm_uw(8u)); + if (inst->exec_size > 16) { + const fs_builder ubld16 = abld.group(16, 0).exec_all(); + ubld16.ADD(byte_offset(inst->dst, 32), inst->dst, brw_imm_uw(16u)); + } + } + + inst->remove(block); + progress = true; + + /* Currently this is only ever emitted once, so there's no point in + * continuing to look for more cases. Drop if we ever re-emit it. + */ + break; + } + + if (progress) + s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); + + return progress; +} diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 07db93600a0..371ae4d2d8b 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -379,19 +379,9 @@ fs_nir_emit_system_values(nir_to_brw_state &ntb) * never end up using it. */ { - const fs_builder abld = bld.annotate("gl_SubgroupInvocation", NULL); fs_reg ® = ntb.system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION]; - reg = abld.vgrf(BRW_TYPE_UW); - abld.UNDEF(reg); - - const fs_builder allbld8 = abld.group(8, 0).exec_all(); - allbld8.MOV(reg, brw_imm_v(0x76543210)); - if (s.dispatch_width > 8) - allbld8.ADD(byte_offset(reg, 16), reg, brw_imm_uw(8u)); - if (s.dispatch_width > 16) { - const fs_builder allbld16 = abld.group(16, 0).exec_all(); - allbld16.ADD(byte_offset(reg, 32), reg, brw_imm_uw(16u)); - } + reg = bld.vgrf(s.dispatch_width < 16 ? BRW_TYPE_UD : BRW_TYPE_UW); + bld.emit(SHADER_OPCODE_LOAD_SUBGROUP_INVOCATION, reg); } nir_function_impl *impl = nir_shader_get_entrypoint((nir_shader *)s.nir); diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp index 66e6a3b7057..fb0e0b99c8d 100644 --- a/src/intel/compiler/brw_fs_opt.cpp +++ b/src/intel/compiler/brw_fs_opt.cpp @@ -155,6 +155,8 @@ brw_fs_optimize(fs_visitor &s) OPT(brw_fs_lower_uniform_pull_constant_loads); OPT(brw_fs_lower_find_live_channel); + + OPT(brw_fs_lower_load_subgroup_invocation); } static unsigned