intel/brw: Add SHADER_OPCODE_QUAD_SWAP

For the horizontal, vertical and diagonal variants.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31053>
This commit is contained in:
Caio Oliveira 2024-09-05 17:37:25 -07:00 committed by Marge Bot
parent 73fc29b25c
commit 8474dc853d
7 changed files with 106 additions and 52 deletions

View file

@ -451,6 +451,13 @@ enum opcode {
*/
SHADER_OPCODE_SEL_EXEC,
/* Swap values inside a quad based on the direction.
*
* Source 0: Value.
* Source 1: Immediate with brw_swap_direction.
*/
SHADER_OPCODE_QUAD_SWAP,
/* This turns into an align16 mov from src0 to dst with a swizzle
* provided as an immediate in src1.
*/
@ -723,6 +730,12 @@ enum brw_reduce_op {
BRW_REDUCE_OP_XOR,
};
enum brw_swap_direction {
BRW_SWAP_HORIZONTAL,
BRW_SWAP_VERTICAL,
BRW_SWAP_DIAGONAL,
};
enum ENUM_PACKED brw_predicate {
BRW_PREDICATE_NONE = 0,
BRW_PREDICATE_NORMAL = 1,

View file

@ -249,6 +249,9 @@ fs_inst::is_control_source(unsigned arg) const
arg != MEMORY_LOGICAL_DATA0 &&
arg != MEMORY_LOGICAL_DATA1;
case SHADER_OPCODE_QUAD_SWAP:
return arg == 1;
default:
return false;
}
@ -325,6 +328,7 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
case SHADER_OPCODE_VOTE_ALL:
case SHADER_OPCODE_VOTE_EQUAL:
case SHADER_OPCODE_BALLOT:
case SHADER_OPCODE_QUAD_SWAP:
return false;
default:
return true;

View file

@ -649,6 +649,7 @@ instruction_requires_packed_data(fs_inst *inst)
case FS_OPCODE_DDY_FINE:
case FS_OPCODE_DDY_COARSE:
case SHADER_OPCODE_QUAD_SWIZZLE:
case SHADER_OPCODE_QUAD_SWAP:
return true;
default:
return false;

View file

@ -6699,61 +6699,21 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
break;
}
case nir_intrinsic_quad_swap_horizontal: {
const brw_reg value = get_nir_src(ntb, instr->src[0]);
const brw_reg tmp = bld.vgrf(value.type);
const fs_builder ubld = bld.exec_all().group(s.dispatch_width / 2, 0);
const brw_reg src_left = horiz_stride(value, 2);
const brw_reg src_right = horiz_stride(horiz_offset(value, 1), 2);
const brw_reg tmp_left = horiz_stride(tmp, 2);
const brw_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
ubld.MOV(tmp_left, src_right);
ubld.MOV(tmp_right, src_left);
bld.MOV(retype(dest, value.type), tmp);
break;
}
case nir_intrinsic_quad_swap_vertical: {
const brw_reg value = get_nir_src(ntb, instr->src[0]);
if (nir_src_bit_size(instr->src[0]) == 32) {
/* For 32-bit, we can use a SIMD4x2 instruction to do this easily */
const brw_reg tmp = bld.vgrf(value.type);
const fs_builder ubld = bld.exec_all();
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value,
brw_imm_ud(BRW_SWIZZLE4(2,3,0,1)));
bld.MOV(retype(dest, value.type), tmp);
} else {
/* For larger data types, we have to either emit dispatch_width many
* MOVs or else fall back to doing indirects.
*/
brw_reg idx = bld.vgrf(BRW_TYPE_W);
bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(0x2));
bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, idx);
}
break;
}
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_quad_swap_diagonal: {
const brw_reg value = get_nir_src(ntb, instr->src[0]);
if (nir_src_bit_size(instr->src[0]) == 32) {
/* For 32-bit, we can use a SIMD4x2 instruction to do this easily */
const brw_reg tmp = bld.vgrf(value.type);
const fs_builder ubld = bld.exec_all();
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value,
brw_imm_ud(BRW_SWIZZLE4(3,2,1,0)));
bld.MOV(retype(dest, value.type), tmp);
} else {
/* For larger data types, we have to either emit dispatch_width many
* MOVs or else fall back to doing indirects.
*/
brw_reg idx = bld.vgrf(BRW_TYPE_W);
bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(0x3));
bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, idx);
enum brw_swap_direction dir;
switch (instr->intrinsic) {
case nir_intrinsic_quad_swap_horizontal: dir = BRW_SWAP_HORIZONTAL; break;
case nir_intrinsic_quad_swap_vertical: dir = BRW_SWAP_VERTICAL; break;
case nir_intrinsic_quad_swap_diagonal: dir = BRW_SWAP_DIAGONAL; break;
default: unreachable("invalid quad swap");
}
bld.emit(SHADER_OPCODE_QUAD_SWAP, retype(dest, value.type),
value, brw_imm_ud(dir));
break;
}

View file

@ -238,6 +238,7 @@ brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst)
case SHADER_OPCODE_VOTE_ALL:
case SHADER_OPCODE_VOTE_EQUAL:
case SHADER_OPCODE_BALLOT:
case SHADER_OPCODE_QUAD_SWAP:
invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
break;

View file

@ -539,6 +539,62 @@ brw_lower_ballot(fs_visitor &s, bblock_t *block, fs_inst *inst)
return true;
}
static bool
brw_lower_quad_swap(fs_visitor &s, bblock_t *block, fs_inst *inst)
{
const fs_builder bld(&s, block, inst);
assert(inst->dst.type == inst->src[0].type);
brw_reg dst = inst->dst;
brw_reg value = inst->src[0];
assert(inst->src[1].file == IMM);
enum brw_swap_direction dir = (enum brw_swap_direction)inst->src[1].ud;
switch (dir) {
case BRW_SWAP_HORIZONTAL: {
const brw_reg tmp = bld.vgrf(value.type);
const fs_builder ubld = bld.exec_all().group(s.dispatch_width / 2, 0);
const brw_reg src_left = horiz_stride(value, 2);
const brw_reg src_right = horiz_stride(horiz_offset(value, 1), 2);
const brw_reg tmp_left = horiz_stride(tmp, 2);
const brw_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
ubld.MOV(tmp_left, src_right);
ubld.MOV(tmp_right, src_left);
bld.MOV(retype(dst, value.type), tmp);
break;
}
case BRW_SWAP_VERTICAL:
case BRW_SWAP_DIAGONAL: {
if (brw_type_size_bits(value.type) == 32) {
/* For 32-bit, we can use a SIMD4x2 instruction to do this easily */
const unsigned swizzle = dir == BRW_SWAP_VERTICAL ? BRW_SWIZZLE4(2,3,0,1)
: BRW_SWIZZLE4(3,2,1,0);
const brw_reg tmp = bld.vgrf(value.type);
const fs_builder ubld = bld.exec_all();
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value, brw_imm_ud(swizzle));
bld.MOV(dst, tmp);
} else {
/* For larger data types, we have to either emit dispatch_width many
* MOVs or else fall back to doing indirects.
*/
const unsigned xor_mask = dir == BRW_SWAP_VERTICAL ? 0x2 : 0x3;
brw_reg idx = bld.vgrf(BRW_TYPE_W);
bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(xor_mask));
bld.emit(SHADER_OPCODE_SHUFFLE, dst, value, idx);
}
break;
}
}
inst->remove(block);
return true;
}
bool
brw_fs_lower_subgroup_ops(fs_visitor &s)
{
@ -565,6 +621,10 @@ brw_fs_lower_subgroup_ops(fs_visitor &s)
progress |= brw_lower_ballot(s, block, inst);
break;
case SHADER_OPCODE_QUAD_SWAP:
progress |= brw_lower_quad_swap(s, block, inst);
break;
default:
/* Nothing to do. */
break;

View file

@ -305,6 +305,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
return "vote_equal";
case SHADER_OPCODE_BALLOT:
return "ballot";
case SHADER_OPCODE_QUAD_SWAP:
return "quad_swap";
}
unreachable("not reached");
@ -611,6 +613,19 @@ brw_print_instruction_to_file(const fs_visitor &s, const fs_inst *inst, FILE *fi
fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type));
}
if (inst->opcode == SHADER_OPCODE_QUAD_SWAP && i == 1) {
assert(inst->src[i].file == IMM);
const char *name = NULL;
switch (inst->src[i].ud) {
case BRW_SWAP_HORIZONTAL: name = "horizontal"; break;
case BRW_SWAP_VERTICAL: name = "vertical"; break;
case BRW_SWAP_DIAGONAL: name = "diagonal"; break;
default:
unreachable("invalid brw_swap_direction");
}
fprintf(file, " (%s)", name);
}
}
fprintf(file, " ");