mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-27 08:20:12 +01:00
intel/brw: Add SHADER_OPCODE_QUAD_SWAP
For the horizontal, vertical and diagonal variants. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31053>
This commit is contained in:
parent
73fc29b25c
commit
8474dc853d
7 changed files with 106 additions and 52 deletions
|
|
@ -451,6 +451,13 @@ enum opcode {
|
|||
*/
|
||||
SHADER_OPCODE_SEL_EXEC,
|
||||
|
||||
/* Swap values inside a quad based on the direction.
|
||||
*
|
||||
* Source 0: Value.
|
||||
* Source 1: Immediate with brw_swap_direction.
|
||||
*/
|
||||
SHADER_OPCODE_QUAD_SWAP,
|
||||
|
||||
/* This turns into an align16 mov from src0 to dst with a swizzle
|
||||
* provided as an immediate in src1.
|
||||
*/
|
||||
|
|
@ -723,6 +730,12 @@ enum brw_reduce_op {
|
|||
BRW_REDUCE_OP_XOR,
|
||||
};
|
||||
|
||||
enum brw_swap_direction {
|
||||
BRW_SWAP_HORIZONTAL,
|
||||
BRW_SWAP_VERTICAL,
|
||||
BRW_SWAP_DIAGONAL,
|
||||
};
|
||||
|
||||
enum ENUM_PACKED brw_predicate {
|
||||
BRW_PREDICATE_NONE = 0,
|
||||
BRW_PREDICATE_NORMAL = 1,
|
||||
|
|
|
|||
|
|
@ -249,6 +249,9 @@ fs_inst::is_control_source(unsigned arg) const
|
|||
arg != MEMORY_LOGICAL_DATA0 &&
|
||||
arg != MEMORY_LOGICAL_DATA1;
|
||||
|
||||
case SHADER_OPCODE_QUAD_SWAP:
|
||||
return arg == 1;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -325,6 +328,7 @@ fs_inst::can_do_source_mods(const struct intel_device_info *devinfo) const
|
|||
case SHADER_OPCODE_VOTE_ALL:
|
||||
case SHADER_OPCODE_VOTE_EQUAL:
|
||||
case SHADER_OPCODE_BALLOT:
|
||||
case SHADER_OPCODE_QUAD_SWAP:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -649,6 +649,7 @@ instruction_requires_packed_data(fs_inst *inst)
|
|||
case FS_OPCODE_DDY_FINE:
|
||||
case FS_OPCODE_DDY_COARSE:
|
||||
case SHADER_OPCODE_QUAD_SWIZZLE:
|
||||
case SHADER_OPCODE_QUAD_SWAP:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -6699,61 +6699,21 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_quad_swap_horizontal: {
|
||||
const brw_reg value = get_nir_src(ntb, instr->src[0]);
|
||||
const brw_reg tmp = bld.vgrf(value.type);
|
||||
|
||||
const fs_builder ubld = bld.exec_all().group(s.dispatch_width / 2, 0);
|
||||
|
||||
const brw_reg src_left = horiz_stride(value, 2);
|
||||
const brw_reg src_right = horiz_stride(horiz_offset(value, 1), 2);
|
||||
const brw_reg tmp_left = horiz_stride(tmp, 2);
|
||||
const brw_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
|
||||
|
||||
ubld.MOV(tmp_left, src_right);
|
||||
ubld.MOV(tmp_right, src_left);
|
||||
|
||||
bld.MOV(retype(dest, value.type), tmp);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_quad_swap_vertical: {
|
||||
const brw_reg value = get_nir_src(ntb, instr->src[0]);
|
||||
if (nir_src_bit_size(instr->src[0]) == 32) {
|
||||
/* For 32-bit, we can use a SIMD4x2 instruction to do this easily */
|
||||
const brw_reg tmp = bld.vgrf(value.type);
|
||||
const fs_builder ubld = bld.exec_all();
|
||||
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value,
|
||||
brw_imm_ud(BRW_SWIZZLE4(2,3,0,1)));
|
||||
bld.MOV(retype(dest, value.type), tmp);
|
||||
} else {
|
||||
/* For larger data types, we have to either emit dispatch_width many
|
||||
* MOVs or else fall back to doing indirects.
|
||||
*/
|
||||
brw_reg idx = bld.vgrf(BRW_TYPE_W);
|
||||
bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(0x2));
|
||||
bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, idx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_quad_swap_horizontal:
|
||||
case nir_intrinsic_quad_swap_vertical:
|
||||
case nir_intrinsic_quad_swap_diagonal: {
|
||||
const brw_reg value = get_nir_src(ntb, instr->src[0]);
|
||||
if (nir_src_bit_size(instr->src[0]) == 32) {
|
||||
/* For 32-bit, we can use a SIMD4x2 instruction to do this easily */
|
||||
const brw_reg tmp = bld.vgrf(value.type);
|
||||
const fs_builder ubld = bld.exec_all();
|
||||
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value,
|
||||
brw_imm_ud(BRW_SWIZZLE4(3,2,1,0)));
|
||||
bld.MOV(retype(dest, value.type), tmp);
|
||||
} else {
|
||||
/* For larger data types, we have to either emit dispatch_width many
|
||||
* MOVs or else fall back to doing indirects.
|
||||
*/
|
||||
brw_reg idx = bld.vgrf(BRW_TYPE_W);
|
||||
bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(0x3));
|
||||
bld.emit(SHADER_OPCODE_SHUFFLE, retype(dest, value.type), value, idx);
|
||||
|
||||
enum brw_swap_direction dir;
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_quad_swap_horizontal: dir = BRW_SWAP_HORIZONTAL; break;
|
||||
case nir_intrinsic_quad_swap_vertical: dir = BRW_SWAP_VERTICAL; break;
|
||||
case nir_intrinsic_quad_swap_diagonal: dir = BRW_SWAP_DIAGONAL; break;
|
||||
default: unreachable("invalid quad swap");
|
||||
}
|
||||
|
||||
bld.emit(SHADER_OPCODE_QUAD_SWAP, retype(dest, value.type),
|
||||
value, brw_imm_ud(dir));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -238,6 +238,7 @@ brw_validate_instruction_phase(const fs_visitor &s, fs_inst *inst)
|
|||
case SHADER_OPCODE_VOTE_ALL:
|
||||
case SHADER_OPCODE_VOTE_EQUAL:
|
||||
case SHADER_OPCODE_BALLOT:
|
||||
case SHADER_OPCODE_QUAD_SWAP:
|
||||
invalid_from = BRW_SHADER_PHASE_AFTER_EARLY_LOWERING;
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -539,6 +539,62 @@ brw_lower_ballot(fs_visitor &s, bblock_t *block, fs_inst *inst)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_lower_quad_swap(fs_visitor &s, bblock_t *block, fs_inst *inst)
|
||||
{
|
||||
const fs_builder bld(&s, block, inst);
|
||||
|
||||
assert(inst->dst.type == inst->src[0].type);
|
||||
brw_reg dst = inst->dst;
|
||||
brw_reg value = inst->src[0];
|
||||
|
||||
assert(inst->src[1].file == IMM);
|
||||
enum brw_swap_direction dir = (enum brw_swap_direction)inst->src[1].ud;
|
||||
|
||||
switch (dir) {
|
||||
case BRW_SWAP_HORIZONTAL: {
|
||||
const brw_reg tmp = bld.vgrf(value.type);
|
||||
|
||||
const fs_builder ubld = bld.exec_all().group(s.dispatch_width / 2, 0);
|
||||
|
||||
const brw_reg src_left = horiz_stride(value, 2);
|
||||
const brw_reg src_right = horiz_stride(horiz_offset(value, 1), 2);
|
||||
const brw_reg tmp_left = horiz_stride(tmp, 2);
|
||||
const brw_reg tmp_right = horiz_stride(horiz_offset(tmp, 1), 2);
|
||||
|
||||
ubld.MOV(tmp_left, src_right);
|
||||
ubld.MOV(tmp_right, src_left);
|
||||
|
||||
bld.MOV(retype(dst, value.type), tmp);
|
||||
break;
|
||||
}
|
||||
case BRW_SWAP_VERTICAL:
|
||||
case BRW_SWAP_DIAGONAL: {
|
||||
if (brw_type_size_bits(value.type) == 32) {
|
||||
/* For 32-bit, we can use a SIMD4x2 instruction to do this easily */
|
||||
const unsigned swizzle = dir == BRW_SWAP_VERTICAL ? BRW_SWIZZLE4(2,3,0,1)
|
||||
: BRW_SWIZZLE4(3,2,1,0);
|
||||
const brw_reg tmp = bld.vgrf(value.type);
|
||||
const fs_builder ubld = bld.exec_all();
|
||||
ubld.emit(SHADER_OPCODE_QUAD_SWIZZLE, tmp, value, brw_imm_ud(swizzle));
|
||||
bld.MOV(dst, tmp);
|
||||
} else {
|
||||
/* For larger data types, we have to either emit dispatch_width many
|
||||
* MOVs or else fall back to doing indirects.
|
||||
*/
|
||||
const unsigned xor_mask = dir == BRW_SWAP_VERTICAL ? 0x2 : 0x3;
|
||||
brw_reg idx = bld.vgrf(BRW_TYPE_W);
|
||||
bld.XOR(idx, bld.LOAD_SUBGROUP_INVOCATION(), brw_imm_w(xor_mask));
|
||||
bld.emit(SHADER_OPCODE_SHUFFLE, dst, value, idx);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inst->remove(block);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_fs_lower_subgroup_ops(fs_visitor &s)
|
||||
{
|
||||
|
|
@ -565,6 +621,10 @@ brw_fs_lower_subgroup_ops(fs_visitor &s)
|
|||
progress |= brw_lower_ballot(s, block, inst);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_QUAD_SWAP:
|
||||
progress |= brw_lower_quad_swap(s, block, inst);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Nothing to do. */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -305,6 +305,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
|||
return "vote_equal";
|
||||
case SHADER_OPCODE_BALLOT:
|
||||
return "ballot";
|
||||
case SHADER_OPCODE_QUAD_SWAP:
|
||||
return "quad_swap";
|
||||
}
|
||||
|
||||
unreachable("not reached");
|
||||
|
|
@ -611,6 +613,19 @@ brw_print_instruction_to_file(const fs_visitor &s, const fs_inst *inst, FILE *fi
|
|||
|
||||
fprintf(file, ":%s", brw_reg_type_to_letters(inst->src[i].type));
|
||||
}
|
||||
|
||||
if (inst->opcode == SHADER_OPCODE_QUAD_SWAP && i == 1) {
|
||||
assert(inst->src[i].file == IMM);
|
||||
const char *name = NULL;
|
||||
switch (inst->src[i].ud) {
|
||||
case BRW_SWAP_HORIZONTAL: name = "horizontal"; break;
|
||||
case BRW_SWAP_VERTICAL: name = "vertical"; break;
|
||||
case BRW_SWAP_DIAGONAL: name = "diagonal"; break;
|
||||
default:
|
||||
unreachable("invalid brw_swap_direction");
|
||||
}
|
||||
fprintf(file, " (%s)", name);
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(file, " ");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue