intel/brw: Add SHADER_OPCODE_READ_FROM_CHANNEL and LIVE_CHANNEL

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32412>
This commit is contained in:
Caio Oliveira 2024-11-29 15:31:05 -08:00
parent 4d04396531
commit 93dfe504f2
6 changed files with 84 additions and 22 deletions

View file

@ -457,6 +457,21 @@ enum opcode {
*/
SHADER_OPCODE_QUAD_SWAP,
/* Read value from the first live channel and broadcast the result
* to all channels.
*
* Source 0: Value.
*/
SHADER_OPCODE_READ_FROM_LIVE_CHANNEL,
/* Read value from a specified channel and broadcast the result
* to all channels.
*
* Source 0: Value.
* Source 1: Index of the channel to pick value from.
*/
SHADER_OPCODE_READ_FROM_CHANNEL,
/* This turns into an align16 mov from src0 to dst with a swizzle
* provided as an immediate in src1.
*/

View file

@ -334,6 +334,8 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
case SHADER_OPCODE_VOTE_EQUAL:
case SHADER_OPCODE_BALLOT:
case SHADER_OPCODE_QUAD_SWAP:
case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL:
case SHADER_OPCODE_READ_FROM_CHANNEL:
return false;
default:
return true;

View file

@ -6633,33 +6633,15 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
const brw_reg value = get_nir_src(ntb, instr->src[0]);
const brw_reg invocation = get_nir_src_imm(ntb, instr->src[1]);
if (invocation.file == IMM) {
unsigned i = invocation.ud & (bld.dispatch_width() - 1);
bld.MOV(retype(dest, value.type), component(value, i));
break;
}
/* When for some reason the subgroup_size picked by NIR is larger than
* the dispatch size picked by the backend (this could happen in RT,
* FS), bound the invocation to the dispatch size.
*/
brw_reg bound_invocation = retype(invocation, BRW_TYPE_UD);
if (s.api_subgroup_size == 0 ||
bld.dispatch_width() < s.api_subgroup_size) {
bound_invocation =
bld.AND(bound_invocation, brw_imm_ud(s.dispatch_width - 1));
}
brw_reg tmp = bld.BROADCAST(value, bld.emit_uniformize(bound_invocation));
bld.MOV(retype(dest, value.type), tmp);
bld.emit(SHADER_OPCODE_READ_FROM_CHANNEL, retype(dest, value.type),
value, invocation);
break;
}
case nir_intrinsic_read_first_invocation: {
const brw_reg value = get_nir_src(ntb, instr->src[0]);
bld.MOV(retype(dest, value.type), bld.emit_uniformize(value));
bld.emit(SHADER_OPCODE_READ_FROM_LIVE_CHANNEL, retype(dest, value.type), value);
break;
}

View file

@ -239,6 +239,8 @@ brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst)
case SHADER_OPCODE_VOTE_EQUAL:
case SHADER_OPCODE_BALLOT:
case SHADER_OPCODE_QUAD_SWAP:
case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL:
case SHADER_OPCODE_READ_FROM_CHANNEL:
invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
break;

View file

@ -595,6 +595,55 @@ brw_lower_quad_swap(fs_visitor &s, bblock_t *block, fs_inst *inst)
return true;
}
static bool
brw_lower_read_from_live_channel(fs_visitor &s, bblock_t *block, fs_inst *inst)
{
const fs_builder bld(&s, block, inst);
assert(inst->sources == 1);
assert(inst->dst.type == inst->src[0].type);
brw_reg dst = inst->dst;
brw_reg value = inst->src[0];
bld.MOV(dst, bld.emit_uniformize(value));
inst->remove(block);
return true;
}
static bool
brw_lower_read_from_channel(fs_visitor &s, bblock_t *block, fs_inst *inst)
{
const fs_builder bld(&s, block, inst);
assert(inst->sources == 2);
assert(inst->dst.type == inst->src[0].type);
brw_reg dst = inst->dst;
brw_reg value = inst->src[0];
brw_reg index = retype(inst->src[1], BRW_TYPE_UD);
/* When for some reason the subgroup_size picked by NIR is larger than
* the dispatch size picked by the backend (this could happen in RT,
* FS), bound the invocation to the dispatch size.
*/
const unsigned dispatch_width_mask = s.dispatch_width - 1;
if (index.file == IMM) {
/* Always apply mask here since it is cheap. */
bld.MOV(dst, component(value, index.ud & dispatch_width_mask));
} else {
if (s.api_subgroup_size == 0 || s.dispatch_width < s.api_subgroup_size)
index = bld.AND(index, brw_imm_ud(dispatch_width_mask));
brw_reg tmp = bld.BROADCAST(value, bld.emit_uniformize(index));
bld.MOV(dst, tmp);
}
inst->remove(block);
return true;
}
bool
brw_fs_lower_subgroup_ops(fs_visitor &s)
{
@ -625,6 +674,14 @@ brw_fs_lower_subgroup_ops(fs_visitor &s)
progress |= brw_lower_quad_swap(s, block, inst);
break;
case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL:
progress |= brw_lower_read_from_live_channel(s, block, inst);
break;
case SHADER_OPCODE_READ_FROM_CHANNEL:
progress |= brw_lower_read_from_channel(s, block, inst);
break;
default:
/* Nothing to do. */
break;

View file

@ -290,6 +290,10 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
return "ballot";
case SHADER_OPCODE_QUAD_SWAP:
return "quad_swap";
case SHADER_OPCODE_READ_FROM_LIVE_CHANNEL:
return "read_from_live_channel";
case SHADER_OPCODE_READ_FROM_CHANNEL:
return "read_from_channel";
}
unreachable("not reached");