diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 27e2ea33790..759dc3f67a4 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -457,6 +457,21 @@ enum opcode { */ SHADER_OPCODE_QUAD_SWAP, + /* Read value from the first live channel and broadcast the result + * to all channels. + * + * Source 0: Value. + */ + SHADER_OPCODE_READ_FROM_LIVE_CHANNEL, + + /* Read value from a specified channel and broadcast the result + * to all channels. + * + * Source 0: Value. + * Source 1: Index of the channel to pick value from. + */ + SHADER_OPCODE_READ_FROM_CHANNEL, + /* This turns into an align16 mov from src0 to dst with a swizzle * provided as an immediate in src1. */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 8be8f50bc33..6b5f5d4d837 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -334,6 +334,8 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const case SHADER_OPCODE_VOTE_EQUAL: case SHADER_OPCODE_BALLOT: case SHADER_OPCODE_QUAD_SWAP: + case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL: + case SHADER_OPCODE_READ_FROM_CHANNEL: return false; default: return true; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 09f6b3c3f8d..1bd3c02599e 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -6633,33 +6633,15 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, const brw_reg value = get_nir_src(ntb, instr->src[0]); const brw_reg invocation = get_nir_src_imm(ntb, instr->src[1]); - if (invocation.file == IMM) { - unsigned i = invocation.ud & (bld.dispatch_width() - 1); - bld.MOV(retype(dest, value.type), component(value, i)); - break; - } - - - /* When for some reason the subgroup_size picked by NIR is larger than - * the dispatch size picked by the backend (this could happen in RT, - * FS), bound the invocation to the dispatch size. - */ - brw_reg bound_invocation = retype(invocation, BRW_TYPE_UD); - if (s.api_subgroup_size == 0 || - bld.dispatch_width() < s.api_subgroup_size) { - bound_invocation = - bld.AND(bound_invocation, brw_imm_ud(s.dispatch_width - 1)); - } - - brw_reg tmp = bld.BROADCAST(value, bld.emit_uniformize(bound_invocation)); - - bld.MOV(retype(dest, value.type), tmp); + bld.emit(SHADER_OPCODE_READ_FROM_CHANNEL, retype(dest, value.type), + value, invocation); break; } case nir_intrinsic_read_first_invocation: { const brw_reg value = get_nir_src(ntb, instr->src[0]); - bld.MOV(retype(dest, value.type), bld.emit_uniformize(value)); + + bld.emit(SHADER_OPCODE_READ_FROM_LIVE_CHANNEL, retype(dest, value.type), value); break; } diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp index dbf799fd30c..78808344beb 100644 --- a/src/intel/compiler/brw_fs_validate.cpp +++ b/src/intel/compiler/brw_fs_validate.cpp @@ -239,6 +239,8 @@ brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst) case SHADER_OPCODE_VOTE_EQUAL: case SHADER_OPCODE_BALLOT: case SHADER_OPCODE_QUAD_SWAP: + case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL: + case SHADER_OPCODE_READ_FROM_CHANNEL: invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING; break; diff --git a/src/intel/compiler/brw_lower_subgroup_ops.cpp b/src/intel/compiler/brw_lower_subgroup_ops.cpp index 2cf38e92ddd..d53f703c1a3 100644 --- a/src/intel/compiler/brw_lower_subgroup_ops.cpp +++ b/src/intel/compiler/brw_lower_subgroup_ops.cpp @@ -595,6 +595,55 @@ brw_lower_quad_swap(fs_visitor &s, bblock_t *block, fs_inst *inst) return true; } +static bool +brw_lower_read_from_live_channel(fs_visitor &s, bblock_t *block, fs_inst *inst) +{ + const fs_builder bld(&s, block, inst); + + assert(inst->sources == 1); + assert(inst->dst.type == inst->src[0].type); + brw_reg dst = inst->dst; + brw_reg value = inst->src[0]; + + bld.MOV(dst, bld.emit_uniformize(value)); + + inst->remove(block); + return true; +} + +static bool +brw_lower_read_from_channel(fs_visitor &s, bblock_t *block, fs_inst *inst) +{ + const fs_builder bld(&s, block, inst); + + assert(inst->sources == 2); + assert(inst->dst.type == inst->src[0].type); + + brw_reg dst = inst->dst; + brw_reg value = inst->src[0]; + brw_reg index = retype(inst->src[1], BRW_TYPE_UD); + + /* When for some reason the subgroup_size picked by NIR is larger than + * the dispatch size picked by the backend (this could happen in RT, + * FS), bound the invocation to the dispatch size. + */ + const unsigned dispatch_width_mask = s.dispatch_width - 1; + + if (index.file == IMM) { + /* Always apply mask here since it is cheap. */ + bld.MOV(dst, component(value, index.ud & dispatch_width_mask)); + } else { + if (s.api_subgroup_size == 0 || s.dispatch_width < s.api_subgroup_size) + index = bld.AND(index, brw_imm_ud(dispatch_width_mask)); + + brw_reg tmp = bld.BROADCAST(value, bld.emit_uniformize(index)); + bld.MOV(dst, tmp); + } + + inst->remove(block); + return true; +} + bool brw_fs_lower_subgroup_ops(fs_visitor &s) { @@ -625,6 +674,14 @@ brw_fs_lower_subgroup_ops(fs_visitor &s) progress |= brw_lower_quad_swap(s, block, inst); break; + case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL: + progress |= brw_lower_read_from_live_channel(s, block, inst); + break; + + case SHADER_OPCODE_READ_FROM_CHANNEL: + progress |= brw_lower_read_from_channel(s, block, inst); + break; + default: /* Nothing to do. */ break; diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index 423dc23180f..b6fe7fa28da 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -290,6 +290,10 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "ballot"; case SHADER_OPCODE_QUAD_SWAP: return "quad_swap"; + case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL: + return "read_from_live_channel"; + case SHADER_OPCODE_READ_FROM_CHANNEL: + return "read_from_channel"; } unreachable("not reached");