mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
intel/brw: Add SHADER_OPCODE_*_SCAN
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30496>
This commit is contained in:
parent
9537b62759
commit
0ba1159b0a
6 changed files with 71 additions and 101 deletions
|
|
@ -412,6 +412,15 @@ enum opcode {
|
|||
*/
|
||||
SHADER_OPCODE_REDUCE,
|
||||
|
||||
/* Combine values of previous channels using an operation. Inclusive scan
|
||||
* will include the value of the channel itself in the channel result.
|
||||
*
|
||||
* Source 0: Value.
|
||||
* Source 1: Immediate with brw_reduce_op.
|
||||
*/
|
||||
SHADER_OPCODE_INCLUSIVE_SCAN,
|
||||
SHADER_OPCODE_EXCLUSIVE_SCAN,
|
||||
|
||||
/* Select between src0 and src1 based on channel enables.
|
||||
*
|
||||
* This instruction copies src0 into the enabled channels of the
|
||||
|
|
|
|||
|
|
@ -319,6 +319,8 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
|
|||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
case SHADER_OPCODE_REDUCE:
|
||||
case SHADER_OPCODE_INCLUSIVE_SCAN:
|
||||
case SHADER_OPCODE_EXCLUSIVE_SCAN:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -4680,35 +4680,6 @@ fs_nir_emit_bs_intrinsic(nir_to_brw_state &ntb,
|
|||
}
|
||||
}
|
||||
|
||||
static brw_reg
|
||||
brw_nir_reduction_op_identity(const fs_builder &bld,
|
||||
nir_op op, brw_reg_type type)
|
||||
{
|
||||
nir_const_value value =
|
||||
nir_alu_binop_identity(op, brw_type_size_bits(type));
|
||||
|
||||
switch (brw_type_size_bytes(type)) {
|
||||
case 1:
|
||||
if (type == BRW_TYPE_UB) {
|
||||
return brw_imm_uw(value.u8);
|
||||
} else {
|
||||
assert(type == BRW_TYPE_B);
|
||||
return brw_imm_w(value.i8);
|
||||
}
|
||||
case 2:
|
||||
return retype(brw_imm_uw(value.u16), type);
|
||||
case 4:
|
||||
return retype(brw_imm_ud(value.u32), type);
|
||||
case 8:
|
||||
if (type == BRW_TYPE_DF)
|
||||
return brw_imm_df(value.f64);
|
||||
else
|
||||
return retype(brw_imm_u64(value.u64), type);
|
||||
default:
|
||||
unreachable("Invalid type size");
|
||||
}
|
||||
}
|
||||
|
||||
static brw_reduce_op
|
||||
brw_reduce_op_for_nir_reduction_op(nir_op op)
|
||||
{
|
||||
|
|
@ -4731,50 +4702,6 @@ brw_reduce_op_for_nir_reduction_op(nir_op op)
|
|||
}
|
||||
}
|
||||
|
||||
static opcode
|
||||
brw_op_for_nir_reduction_op(nir_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case nir_op_iadd: return BRW_OPCODE_ADD;
|
||||
case nir_op_fadd: return BRW_OPCODE_ADD;
|
||||
case nir_op_imul: return BRW_OPCODE_MUL;
|
||||
case nir_op_fmul: return BRW_OPCODE_MUL;
|
||||
case nir_op_imin: return BRW_OPCODE_SEL;
|
||||
case nir_op_umin: return BRW_OPCODE_SEL;
|
||||
case nir_op_fmin: return BRW_OPCODE_SEL;
|
||||
case nir_op_imax: return BRW_OPCODE_SEL;
|
||||
case nir_op_umax: return BRW_OPCODE_SEL;
|
||||
case nir_op_fmax: return BRW_OPCODE_SEL;
|
||||
case nir_op_iand: return BRW_OPCODE_AND;
|
||||
case nir_op_ior: return BRW_OPCODE_OR;
|
||||
case nir_op_ixor: return BRW_OPCODE_XOR;
|
||||
default:
|
||||
unreachable("Invalid reduction operation");
|
||||
}
|
||||
}
|
||||
|
||||
static brw_conditional_mod
|
||||
brw_cond_mod_for_nir_reduction_op(nir_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case nir_op_iadd: return BRW_CONDITIONAL_NONE;
|
||||
case nir_op_fadd: return BRW_CONDITIONAL_NONE;
|
||||
case nir_op_imul: return BRW_CONDITIONAL_NONE;
|
||||
case nir_op_fmul: return BRW_CONDITIONAL_NONE;
|
||||
case nir_op_imin: return BRW_CONDITIONAL_L;
|
||||
case nir_op_umin: return BRW_CONDITIONAL_L;
|
||||
case nir_op_fmin: return BRW_CONDITIONAL_L;
|
||||
case nir_op_imax: return BRW_CONDITIONAL_GE;
|
||||
case nir_op_umax: return BRW_CONDITIONAL_GE;
|
||||
case nir_op_fmax: return BRW_CONDITIONAL_GE;
|
||||
case nir_op_iand: return BRW_CONDITIONAL_NONE;
|
||||
case nir_op_ior: return BRW_CONDITIONAL_NONE;
|
||||
case nir_op_ixor: return BRW_CONDITIONAL_NONE;
|
||||
default:
|
||||
unreachable("Invalid reduction operation");
|
||||
}
|
||||
}
|
||||
|
||||
struct rebuild_resource {
|
||||
unsigned idx;
|
||||
std::vector<nir_def *> array;
|
||||
|
|
@ -7074,40 +7001,18 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
case nir_intrinsic_inclusive_scan:
|
||||
case nir_intrinsic_exclusive_scan: {
|
||||
brw_reg src = get_nir_src(ntb, instr->src[0]);
|
||||
nir_op redop = (nir_op)nir_intrinsic_reduction_op(instr);
|
||||
nir_op op = (nir_op)nir_intrinsic_reduction_op(instr);
|
||||
enum brw_reduce_op brw_op = brw_reduce_op_for_nir_reduction_op(op);
|
||||
|
||||
/* Figure out the source type */
|
||||
src.type = brw_type_for_nir_type(devinfo,
|
||||
(nir_alu_type)(nir_op_infos[redop].input_types[0] |
|
||||
(nir_alu_type)(nir_op_infos[op].input_types[0] |
|
||||
nir_src_bit_size(instr->src[0])));
|
||||
|
||||
brw_reg identity = brw_nir_reduction_op_identity(bld, redop, src.type);
|
||||
opcode brw_op = brw_op_for_nir_reduction_op(redop);
|
||||
brw_conditional_mod cond_mod = brw_cond_mod_for_nir_reduction_op(redop);
|
||||
enum opcode opcode = instr->intrinsic == nir_intrinsic_exclusive_scan ?
|
||||
SHADER_OPCODE_EXCLUSIVE_SCAN : SHADER_OPCODE_INCLUSIVE_SCAN;
|
||||
|
||||
/* Set up a register for all of our scratching around and initialize it
|
||||
* to reduction operation's identity value.
|
||||
*/
|
||||
brw_reg scan = bld.vgrf(src.type);
|
||||
const fs_builder allbld = bld.exec_all();
|
||||
allbld.emit(SHADER_OPCODE_SEL_EXEC, scan, src, identity);
|
||||
|
||||
if (instr->intrinsic == nir_intrinsic_exclusive_scan) {
|
||||
/* Exclusive scan is a bit harder because we have to do an annoying
|
||||
* shift of the contents before we can begin. To make things worse,
|
||||
* we can't do this with a normal stride; we have to use indirects.
|
||||
*/
|
||||
brw_reg shifted = bld.vgrf(src.type);
|
||||
brw_reg idx = bld.vgrf(BRW_TYPE_W);
|
||||
allbld.ADD(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(-1));
|
||||
allbld.emit(SHADER_OPCODE_SHUFFLE, shifted, scan, idx);
|
||||
allbld.group(1, 0).MOV(horiz_offset(shifted, 0), identity);
|
||||
scan = shifted;
|
||||
}
|
||||
|
||||
bld.emit_scan(brw_op, scan, s.dispatch_width, cond_mod);
|
||||
|
||||
bld.MOV(retype(dest, src.type), scan);
|
||||
bld.emit(opcode, retype(dest, src.type), src, brw_imm_ud(brw_op));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -232,6 +232,8 @@ brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst)
|
|||
case SHADER_OPCODE_URB_READ_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_REDUCE:
|
||||
case SHADER_OPCODE_INCLUSIVE_SCAN:
|
||||
case SHADER_OPCODE_EXCLUSIVE_SCAN:
|
||||
invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -173,6 +173,49 @@ brw_lower_reduce(fs_visitor &s, bblock_t *block, fs_inst *inst)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_lower_scan(fs_visitor &s, bblock_t *block, fs_inst *inst)
|
||||
{
|
||||
const fs_builder bld(&s, block, inst);
|
||||
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
brw_reg dst = inst->dst;
|
||||
brw_reg src = inst->src[0];
|
||||
|
||||
assert(inst->src[1].file == IMM);
|
||||
enum brw_reduce_op op = (enum brw_reduce_op)inst->src[1].ud;
|
||||
|
||||
struct brw_reduction_info info = brw_get_reduction_info(op, src.type);
|
||||
|
||||
/* Set up a register for all of our scratching around and initialize it
|
||||
* to reduction operation's identity value.
|
||||
*/
|
||||
brw_reg scan = bld.vgrf(src.type);
|
||||
const fs_builder ubld = bld.exec_all();
|
||||
ubld.emit(SHADER_OPCODE_SEL_EXEC, scan, src, info.identity);
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_EXCLUSIVE_SCAN) {
|
||||
/* Exclusive scan is a bit harder because we have to do an annoying
|
||||
* shift of the contents before we can begin. To make things worse,
|
||||
* we can't do this with a normal stride; we have to use indirects.
|
||||
*/
|
||||
brw_reg shifted = bld.vgrf(src.type);
|
||||
brw_reg idx = bld.vgrf(BRW_TYPE_W);
|
||||
|
||||
ubld.ADD(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(-1));
|
||||
ubld.emit(SHADER_OPCODE_SHUFFLE, shifted, scan, idx);
|
||||
ubld.group(1, 0).MOV(horiz_offset(shifted, 0), info.identity);
|
||||
scan = shifted;
|
||||
}
|
||||
|
||||
bld.emit_scan(info.op, scan, s.dispatch_width, info.cond_mod);
|
||||
|
||||
bld.MOV(dst, scan);
|
||||
|
||||
inst->remove(block);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_fs_lower_subgroup_ops(fs_visitor &s)
|
||||
{
|
||||
|
|
@ -184,6 +227,11 @@ brw_fs_lower_subgroup_ops(fs_visitor &s)
|
|||
progress |= brw_lower_reduce(s, block, inst);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_INCLUSIVE_SCAN:
|
||||
case SHADER_OPCODE_EXCLUSIVE_SCAN:
|
||||
progress |= brw_lower_scan(s, block, inst);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Nothing to do. */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -293,6 +293,10 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
|||
return "memory_atomic";
|
||||
case SHADER_OPCODE_REDUCE:
|
||||
return "reduce";
|
||||
case SHADER_OPCODE_INCLUSIVE_SCAN:
|
||||
return "inclusive_scan";
|
||||
case SHADER_OPCODE_EXCLUSIVE_SCAN:
|
||||
return "exclusive_scan";
|
||||
}
|
||||
|
||||
unreachable("not reached");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue