diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 4fa245cc3a2..59211b747d3 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -451,6 +451,13 @@ enum opcode { */ SHADER_OPCODE_SEL_EXEC, + /* Swap values inside a quad based on the direction. + * + * Source 0: Value. + * Source 1: Immediate with brw_swap_direction. + */ + SHADER_OPCODE_QUAD_SWAP, + /* This turns into an align16 mov from src0 to dst with a swizzle * provided as an immediate in src1. */ @@ -723,6 +730,12 @@ enum brw_reduce_op { BRW_REDUCE_OP_XOR, }; +enum brw_swap_direction { + BRW_SWAP_HORIZONTAL, + BRW_SWAP_VERTICAL, + BRW_SWAP_DIAGONAL, +}; + enum ENUM_PACKED brw_predicate { BRW_PREDICATE_NONE = 0, BRW_PREDICATE_NORMAL = 1, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 53f56457fb3..1f2300de031 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -249,6 +249,9 @@ fs_inst::is_control_source(unsigned arg) const arg != MEMORY_LOGICAL_DATA0 && arg != MEMORY_LOGICAL_DATA1; + case SHADER_OPCODE_QUAD_SWAP: + return arg == 1; + default: return false; } @@ -325,6 +328,7 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const case SHADER_OPCODE_VOTE_ALL: case SHADER_OPCODE_VOTE_EQUAL: case SHADER_OPCODE_BALLOT: + case SHADER_OPCODE_QUAD_SWAP: return false; default: return true; diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 7585aa0d112..71dcbaa4b00 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -649,6 +649,7 @@ instruction_requires_packed_data(fs_inst *inst) case FS_OPCODE_DDY_FINE: case FS_OPCODE_DDY_COARSE: case SHADER_OPCODE_QUAD_SWIZZLE: + case SHADER_OPCODE_QUAD_SWAP: return true; default: return false; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index cd5af636716..657e1367cc4 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -6699,61 +6699,21 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb, break; } - case nir_intrinsic_quad_swap_horizontal: { - const brw_reg value = get_nir_src(ntb, instr->src[0]); - const brw_reg tmp = bld.vgrf(value.type); - - const fs_builder ubld = bld.exec_all().group(s.dispatch_width / 2, 0); - - const brw_reg src_left = horiz_stride(value, 2); - const brw_reg src_right = horiz_stride(horiz_offset(value, 1), 2); - const brw_reg tmp_left = horiz_stride(tmp, 2); - const brw_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2); - - ubld.MOV(tmp_left, src_right); - ubld.MOV(tmp_right, src_left); - - bld.MOV(retype(dest, value.type), tmp); - break; - } - - case nir_intrinsic_quad_swap_vertical: { - const brw_reg value = get_nir_src(ntb, instr->src[0]); - if (nir_src_bit_size(instr->src[0]) == 32) { - /* For 32-bit, we can use a SIMD4x2 instruction to do this easily */ - const brw_reg tmp = bld.vgrf(value.type); - const fs_builder ubld = bld.exec_all(); - ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value, - brw_imm_ud(BRW_SWIZZLE4(2,3,0,1))); - bld.MOV(retype(dest, value.type), tmp); - } else { - /* For larger data types, we have to either emit dispatch_width many - * MOVs or else fall back to doing indirects. - */ - brw_reg idx = bld.vgrf(BRW_TYPE_W); - bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(0x2)); - bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, idx); - } - break; - } - + case nir_intrinsic_quad_swap_horizontal: + case nir_intrinsic_quad_swap_vertical: case nir_intrinsic_quad_swap_diagonal: { const brw_reg value = get_nir_src(ntb, instr->src[0]); - if (nir_src_bit_size(instr->src[0]) == 32) { - /* For 32-bit, we can use a SIMD4x2 instruction to do this easily */ - const brw_reg tmp = bld.vgrf(value.type); - const fs_builder ubld = bld.exec_all(); - ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value, - brw_imm_ud(BRW_SWIZZLE4(3,2,1,0))); - bld.MOV(retype(dest, value.type), tmp); - } else { - /* For larger data types, we have to either emit dispatch_width many - * MOVs or else fall back to doing indirects. - */ - brw_reg idx = bld.vgrf(BRW_TYPE_W); - bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(0x3)); - bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, idx); + + enum brw_swap_direction dir; + switch (instr->intrinsic) { + case nir_intrinsic_quad_swap_horizontal: dir = BRW_SWAP_HORIZONTAL; break; + case nir_intrinsic_quad_swap_vertical: dir = BRW_SWAP_VERTICAL; break; + case nir_intrinsic_quad_swap_diagonal: dir = BRW_SWAP_DIAGONAL; break; + default: unreachable("invalid quad swap"); } + + bld.emit(SHADER_OPCODE_QUAD_SWAP, retype(dest, value.type), + value, brw_imm_ud(dir)); break; } diff --git a/src/intel/compiler/brw_fs_validate.cpp b/src/intel/compiler/brw_fs_validate.cpp index 9fc3c45153f..dbf799fd30c 100644 --- a/src/intel/compiler/brw_fs_validate.cpp +++ b/src/intel/compiler/brw_fs_validate.cpp @@ -238,6 +238,7 @@ brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst) case SHADER_OPCODE_VOTE_ALL: case SHADER_OPCODE_VOTE_EQUAL: case SHADER_OPCODE_BALLOT: + case SHADER_OPCODE_QUAD_SWAP: invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING; break; diff --git a/src/intel/compiler/brw_lower_subgroup_ops.cpp b/src/intel/compiler/brw_lower_subgroup_ops.cpp index 409d00b0723..2cf38e92ddd 100644 --- a/src/intel/compiler/brw_lower_subgroup_ops.cpp +++ b/src/intel/compiler/brw_lower_subgroup_ops.cpp @@ -539,6 +539,62 @@ brw_lower_ballot(fs_visitor &s, bblock_t *block, fs_inst *inst) return true; } +static bool +brw_lower_quad_swap(fs_visitor &s, bblock_t *block, fs_inst *inst) +{ + const fs_builder bld(&s, block, inst); + + assert(inst->dst.type == inst->src[0].type); + brw_reg dst = inst->dst; + brw_reg value = inst->src[0]; + + assert(inst->src[1].file == IMM); + enum brw_swap_direction dir = (enum brw_swap_direction)inst->src[1].ud; + + switch (dir) { + case BRW_SWAP_HORIZONTAL: { + const brw_reg tmp = bld.vgrf(value.type); + + const fs_builder ubld = bld.exec_all().group(s.dispatch_width / 2, 0); + + const brw_reg src_left = horiz_stride(value, 2); + const brw_reg src_right = horiz_stride(horiz_offset(value, 1), 2); + const brw_reg tmp_left = horiz_stride(tmp, 2); + const brw_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2); + + ubld.MOV(tmp_left, src_right); + ubld.MOV(tmp_right, src_left); + + bld.MOV(retype(dst, value.type), tmp); + break; + } + case BRW_SWAP_VERTICAL: + case BRW_SWAP_DIAGONAL: { + if (brw_type_size_bits(value.type) == 32) { + /* For 32-bit, we can use a SIMD4x2 instruction to do this easily */ + const unsigned swizzle = dir == BRW_SWAP_VERTICAL ? BRW_SWIZZLE4(2,3,0,1) + : BRW_SWIZZLE4(3,2,1,0); + const brw_reg tmp = bld.vgrf(value.type); + const fs_builder ubld = bld.exec_all(); + ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value, brw_imm_ud(swizzle)); + bld.MOV(dst, tmp); + } else { + /* For larger data types, we have to either emit dispatch_width many + * MOVs or else fall back to doing indirects. + */ + const unsigned xor_mask = dir == BRW_SWAP_VERTICAL ? 0x2 : 0x3; + brw_reg idx = bld.vgrf(BRW_TYPE_W); + bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(xor_mask)); + bld.emit(SHADER_OPCODE_SHUFFLE, dst, value, idx); + } + break; + } + } + + inst->remove(block); + return true; +} + bool brw_fs_lower_subgroup_ops(fs_visitor &s) { @@ -565,6 +621,10 @@ brw_fs_lower_subgroup_ops(fs_visitor &s) progress |= brw_lower_ballot(s, block, inst); break; + case SHADER_OPCODE_QUAD_SWAP: + progress |= brw_lower_quad_swap(s, block, inst); + break; + default: /* Nothing to do. */ break; diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index 6f10585b7d6..7b7c73b254e 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -305,6 +305,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) return "vote_equal"; case SHADER_OPCODE_BALLOT: return "ballot"; + case SHADER_OPCODE_QUAD_SWAP: + return "quad_swap"; } unreachable("not reached"); @@ -611,6 +613,19 @@ brw_print_instruction_to_file(const fs_visitor &s, const fs_inst *inst, FILE *fi fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type)); } + + if (inst->opcode == SHADER_OPCODE_QUAD_SWAP && i == 1) { + assert(inst->src[i].file == IMM); + const char *name = NULL; + switch (inst->src[i].ud) { + case BRW_SWAP_HORIZONTAL: name = "horizontal"; break; + case BRW_SWAP_VERTICAL: name = "vertical"; break; + case BRW_SWAP_DIAGONAL: name = "diagonal"; break; + default: + unreachable("invalid brw_swap_direction"); + } + fprintf(file, " (%s)", name); + } } fprintf(file, " ");