intel/brw: Add SHADER_OPCODE_BALLOT

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31052>
This commit is contained in:
Caio Oliveira 2024-09-05 09:23:11 -07:00 committed by Marge Bot
parent 50e7a8ef25
commit 2bd7592b0b
7 changed files with 46 additions and 19 deletions

View file

@ -437,6 +437,13 @@ enum opcode {
*/ */
SHADER_OPCODE_VOTE_EQUAL, SHADER_OPCODE_VOTE_EQUAL,
/* Produces a mask from the boolean value from all channels, and broadcast
* the result to all channels.
*
* Source 0: Boolean value.
*/
SHADER_OPCODE_BALLOT,
/* Select between src0 and src1 based on channel enables. /* Select between src0 and src1 based on channel enables.
* *
* This instruction copies src0 into the enabled channels of the * This instruction copies src0 into the enabled channels of the

View file

@ -324,6 +324,7 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
case SHADER_OPCODE_VOTE_ANY: case SHADER_OPCODE_VOTE_ANY:
case SHADER_OPCODE_VOTE_ALL: case SHADER_OPCODE_VOTE_ALL:
case SHADER_OPCODE_VOTE_EQUAL: case SHADER_OPCODE_VOTE_EQUAL:
case SHADER_OPCODE_BALLOT:
return false; return false;
default: default:
return true; return true;

View file

@ -1216,6 +1216,7 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type,
case BRW_OPCODE_LRP: case BRW_OPCODE_LRP:
case FS_OPCODE_PACK_HALF_2x16_SPLIT: case FS_OPCODE_PACK_HALF_2x16_SPLIT:
case SHADER_OPCODE_SHUFFLE: case SHADER_OPCODE_SHUFFLE:
case SHADER_OPCODE_BALLOT:
inst->src[arg] = val; inst->src[arg] = val;
progress = true; progress = true;
break; break;

View file

@ -6627,25 +6627,8 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
dest.type = BRW_TYPE_UD; dest.type = BRW_TYPE_UD;
} }
/* Implement a fast-path for ballot(true). */ const brw_reg value = get_nir_src(ntb, instr->src[0]);
if (nir_src_is_const(instr->src[0]) && bld.emit(SHADER_OPCODE_BALLOT, dest, value);
nir_src_as_bool(instr->src[0])) {
brw_reg tmp = bld.vgrf(BRW_TYPE_UD);
bld.exec_all().emit(SHADER_OPCODE_LOAD_LIVE_CHANNELS, tmp);
bld.MOV(dest, brw_reg(component(tmp, 0)));
break;
}
const brw_reg value = retype(get_nir_src(ntb, instr->src[0]),
BRW_TYPE_UD);
struct brw_reg flag = brw_flag_reg(0, 0);
if (s.dispatch_width == 32)
flag.type = BRW_TYPE_UD;
bld.exec_all().group(1, 0).MOV(flag, retype(brw_imm_ud(0u), flag.type));
bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ);
bld.MOV(dest, flag);
break; break;
} }

View file

@ -237,6 +237,7 @@ brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst)
case SHADER_OPCODE_VOTE_ANY: case SHADER_OPCODE_VOTE_ANY:
case SHADER_OPCODE_VOTE_ALL: case SHADER_OPCODE_VOTE_ALL:
case SHADER_OPCODE_VOTE_EQUAL: case SHADER_OPCODE_VOTE_EQUAL:
case SHADER_OPCODE_BALLOT:
invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING; invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
break; break;

View file

@ -511,6 +511,34 @@ brw_lower_vote(fs_visitor &s, bblock_t *block, fs_inst *inst)
return true; return true;
} }
static bool
brw_lower_ballot(fs_visitor &s, bblock_t *block, fs_inst *inst)
{
const fs_builder bld(&s, block, inst);
brw_reg value = retype(inst->src[0], BRW_TYPE_UD);
brw_reg dst = inst->dst;
if (value.file == IMM) {
/* Implement a fast-path for ballot(true). */
if (!value.is_zero()) {
brw_reg tmp = bld.vgrf(BRW_TYPE_UD);
bld.exec_all().emit(SHADER_OPCODE_LOAD_LIVE_CHANNELS, tmp);
bld.MOV(dst, brw_reg(component(tmp, 0)));
} else {
brw_reg zero = retype(brw_imm_uq(0), dst.type);
bld.MOV(dst, zero);
}
} else {
brw_reg flag = brw_fill_flag(bld, 0);
bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ);
bld.MOV(dst, flag);
}
inst->remove(block);
return true;
}
bool bool
brw_fs_lower_subgroup_ops(fs_visitor &s) brw_fs_lower_subgroup_ops(fs_visitor &s)
{ {
@ -533,6 +561,10 @@ brw_fs_lower_subgroup_ops(fs_visitor &s)
progress |= brw_lower_vote(s, block, inst); progress |= brw_lower_vote(s, block, inst);
break; break;
case SHADER_OPCODE_BALLOT:
progress |= brw_lower_ballot(s, block, inst);
break;
default: default:
/* Nothing to do. */ /* Nothing to do. */
break; break;

View file

@ -303,6 +303,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
return "vote_all"; return "vote_all";
case SHADER_OPCODE_VOTE_EQUAL: case SHADER_OPCODE_VOTE_EQUAL:
return "vote_equal"; return "vote_equal";
case SHADER_OPCODE_BALLOT:
return "ballot";
} }
unreachable("not reached"); unreachable("not reached");