diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 29c781562f4..4fa245cc3a2 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -437,6 +437,13 @@ enum opcode { */ SHADER_OPCODE_VOTE_EQUAL, + /* Produces a mask from the boolean value from all channels, and broadcast + * the result to all channels. + * + * Source 0: Boolean value. + */ + SHADER_OPCODE_BALLOT, + /* Select between src0 and src1 based on channel enables. * * This instruction copies src0 into the enabled channels of the diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 6d6daade4d2..53f56457fb3 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -324,6 +324,7 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const case SHADER_OPCODE_VOTE_ANY: case SHADER_OPCODE_VOTE_ALL: case SHADER_OPCODE_VOTE_EQUAL: + case SHADER_OPCODE_BALLOT: return false; default: return true; diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index d9dcc72adba..7585aa0d112 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -1216,6 +1216,7 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type, case BRW_OPCODE_LRP: case FS_OPCODE_PACK_HALF_2x16_SPLIT: case SHADER_OPCODE_SHUFFLE: + case SHADER_OPCODE_BALLOT: inst->src[arg] = val; progress = true; break; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 433e5b67f2e..cd5af636716 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -6627,25 +6627,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, dest.type = BRW_TYPE_UD; } - /* Implement a fast-path for ballot(true). */ - if (nir_src_is_const(instr->src[0]) && - nir_src_as_bool(instr->src[0])) { - brw_reg tmp = bld.vgrf(BRW_TYPE_UD); - bld.exec_all().emit(SHADER_OPCODE_LOAD_LIVE_CHANNELS, tmp); - bld.MOV(dest, brw_reg(component(tmp, 0))); - break; - } - - const brw_reg value = retype(get_nir_src(ntb, instr->src[0]), - BRW_TYPE_UD); - struct brw_reg flag = brw_flag_reg(0, 0); - - if (s.dispatch_width == 32) - flag.type = BRW_TYPE_UD; - - bld.exec_all().group(1, 0).MOV(flag, retype(brw_imm_ud(0u), flag.type)); - bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ); - bld.MOV(dest, flag); + const brw_reg value = get_nir_src(ntb, instr->src[0]); + bld.emit(SHADER_OPCODE_BALLOT, dest, value); break; } diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp index 2d60c50b504..9fc3c45153f 100644 --- a/src/intel/compiler/brw_fs_validate.cpp +++ b/src/intel/compiler/brw_fs_validate.cpp @@ -237,6 +237,7 @@ brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst) case SHADER_OPCODE_VOTE_ANY: case SHADER_OPCODE_VOTE_ALL: case SHADER_OPCODE_VOTE_EQUAL: + case SHADER_OPCODE_BALLOT: invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING; break; diff --git a/src/intel/compiler/brw_lower_subgroup_ops.cpp b/src/intel/compiler/brw_lower_subgroup_ops.cpp index 680ff51c177..409d00b0723 100644 --- a/src/intel/compiler/brw_lower_subgroup_ops.cpp +++ b/src/intel/compiler/brw_lower_subgroup_ops.cpp @@ -511,6 +511,34 @@ brw_lower_vote(fs_visitor &s, bblock_t *block, fs_inst *inst) return true; } +static bool +brw_lower_ballot(fs_visitor &s, bblock_t *block, fs_inst *inst) +{ + const fs_builder bld(&s, block, inst); + + brw_reg value = retype(inst->src[0], BRW_TYPE_UD); + brw_reg dst = inst->dst; + + if (value.file == IMM) { + /* Implement a fast-path for ballot(true). */ + if (!value.is_zero()) { + brw_reg tmp = bld.vgrf(BRW_TYPE_UD); + bld.exec_all().emit(SHADER_OPCODE_LOAD_LIVE_CHANNELS, tmp); + bld.MOV(dst, brw_reg(component(tmp, 0))); + } else { + brw_reg zero = retype(brw_imm_uq(0), dst.type); + bld.MOV(dst, zero); + } + } else { + brw_reg flag = brw_fill_flag(bld, 0); + bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ); + bld.MOV(dst, flag); + } + + inst->remove(block); + return true; +} + bool brw_fs_lower_subgroup_ops(fs_visitor &s) { @@ -533,6 +561,10 @@ brw_fs_lower_subgroup_ops(fs_visitor &s) progress |= brw_lower_vote(s, block, inst); break; + case SHADER_OPCODE_BALLOT: + progress |= brw_lower_ballot(s, block, inst); + break; + default: /* Nothing to do. */ break; diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index 84291b85879..6f10585b7d6 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -303,6 +303,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "vote_all"; case SHADER_OPCODE_VOTE_EQUAL: return "vote_equal"; + case SHADER_OPCODE_BALLOT: + return "ballot"; } unreachable("not reached");