diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index b6a1cbdc090..60553a4a3f8 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -452,6 +452,10 @@ static const char *const dp_dc1_msg_type_hsw[32] = { "DC untyped atomic float op", [GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] = "DC A64 untyped atomic float op", + [GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP] = + "DC A64 untyped atomic half-integer op", + [GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP] = + "DC A64 untyped atomic half-float op", }; static const char *const aop[16] = { @@ -2067,6 +2071,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2: case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2: case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP: + case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP: control(file, "atomic op", aop, msg_ctrl & 0xf, &space); break; case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ: @@ -2082,6 +2087,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, } case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP: + case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP: format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16); control(file, "atomic float op", aop_float, msg_ctrl & 0xf, &space); diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 81045e4c029..905d64a855e 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -894,9 +894,12 @@ brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo, { assert(exec_size == 8); assert(devinfo->gen >= 8); - assert(bit_size == 32 || bit_size == 64); + assert(bit_size == 16 || bit_size == 32 || bit_size == 64); + assert(devinfo->gen >= 12 || bit_size >= 32); - const unsigned msg_type = GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP; + const unsigned msg_type = bit_size == 16 ? + GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP : + GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP; const unsigned msg_control = SET_BITS(atomic_op, 3, 0) | @@ -910,14 +913,19 @@ brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo, static inline uint32_t brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo, ASSERTED unsigned exec_size, + unsigned bit_size, unsigned atomic_op, bool response_expected) { assert(exec_size == 8); assert(devinfo->gen >= 9); + assert(bit_size == 16 || bit_size == 32); + assert(devinfo->gen >= 12 || bit_size == 32); assert(exec_size > 0); - const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP; + const unsigned msg_type = bit_size == 32 ? + GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP : + GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP; const unsigned msg_control = SET_BITS(atomic_op, 1, 0) | diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 8ac46a36ef2..629668d60b8 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -435,8 +435,10 @@ enum opcode { SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL, SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL, SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, + SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL, SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL, - SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL, + SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL, + SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL, SHADER_OPCODE_TYPED_ATOMIC_LOGICAL, SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL, @@ -1439,12 +1441,14 @@ enum brw_message_target { #define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10 #define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11 #define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12 +#define GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP 0x13 #define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ 0x14 #define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE 0x15 #define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19 #define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a #define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b #define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP 0x1d +#define GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP 0x1e /* GEN9 */ #define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12 diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 349acec7594..eecbd5bcec8 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -859,6 +859,7 @@ fs_inst::components_read(unsigned i) const return i == 1 ? src[2].ud : 1; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: assert(src[2].file == IMM); if (i == 1) { @@ -878,7 +879,8 @@ fs_inst::components_read(unsigned i) const return 1; } - case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: assert(src[2].file == IMM); if (i == 1) { /* Data source */ @@ -5969,15 +5971,28 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst) !inst->dst.is_null()); break; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: + desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 16, + arg, /* atomic_op */ + !inst->dst.is_null()); + break; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 64, arg, /* atomic_op */ !inst->dst.is_null()); break; - - case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size, + 16, /* bit_size */ + arg, /* atomic_op */ + !inst->dst.is_null()); + break; + + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: + desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size, + 32, /* bit_size */ arg, /* atomic_op */ !inst->dst.is_null()); break; @@ -6354,8 +6369,10 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: - case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: lower_a64_logical_send(ibld, inst); break; @@ -6968,8 +6985,10 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, return inst->exec_size; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: - case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: return 8; case SHADER_OPCODE_URB_READ_SIMD8: diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index f3f59006ad0..5ea4c93e3a1 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5686,6 +5686,18 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld, dest, srcs, SURFACE_LOGICAL_NUM_SRCS); } +static fs_reg +expand_to_32bit(const fs_builder &bld, const fs_reg &src) +{ + if (type_sz(src.type) == 2) { + fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW)); + return src32; + } else { + return src; + } +} + void fs_visitor::nir_emit_global_atomic(const fs_builder &bld, int op, nir_intrinsic_instr *instr) @@ -5698,22 +5710,36 @@ fs_visitor::nir_emit_global_atomic(const fs_builder &bld, fs_reg data; if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) - data = get_nir_src(instr->src[1]); + data = expand_to_32bit(bld, get_nir_src(instr->src[1])); if (op == BRW_AOP_CMPWR) { fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { data, get_nir_src(instr->src[2]) }; + fs_reg sources[2] = { + data, + expand_to_32bit(bld, get_nir_src(instr->src[2])) + }; bld.LOAD_PAYLOAD(tmp, sources, 2, 0); data = tmp; } - if (nir_dest_bit_size(instr->dest) == 64) { - bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL, - dest, addr, data, brw_imm_ud(op)); - } else { - assert(nir_dest_bit_size(instr->dest) == 32); + switch (nir_dest_bit_size(instr->dest)) { + case 16: { + fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL, + dest32, addr, data, brw_imm_ud(op)); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32); + break; + } + case 32: bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, dest, addr, data, brw_imm_ud(op)); + break; + case 64: + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL, + dest, addr, data, brw_imm_ud(op)); + break; + default: + unreachable("Unsupported bit size"); } } @@ -5727,17 +5753,33 @@ fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld, fs_reg addr = get_nir_src(instr->src[0]); assert(op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC); - fs_reg data = get_nir_src(instr->src[1]); + fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[1])); if (op == BRW_AOP_FCMPWR) { fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { data, get_nir_src(instr->src[2]) }; + fs_reg sources[2] = { + data, + expand_to_32bit(bld, get_nir_src(instr->src[2])) + }; bld.LOAD_PAYLOAD(tmp, sources, 2, 0); data = tmp; } - bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL, - dest, addr, data, brw_imm_ud(op)); + switch (nir_dest_bit_size(instr->dest)) { + case 16: { + fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL, + dest32, addr, data, brw_imm_ud(op)); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32); + break; + } + case 32: + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL, + dest, addr, data, brw_imm_ud(op)); + break; + default: + unreachable("Unsupported bit size"); + } } void diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index d848f0b15b2..69f85d88f0a 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -515,6 +515,8 @@ schedule_node::set_latency_gen7(bool is_haswell) case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP: case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP: + case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP: + case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP: /* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */ latency = 14000; break; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index cba0b589dde..9a6aad85917 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -323,10 +323,14 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "a64_byte_scattered_write_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: return "a64_untyped_atomic_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: + return "a64_untyped_atomic_int16_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: return "a64_untyped_atomic_int64_logical"; - case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: - return "a64_untyped_atomic_float_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: + return "a64_untyped_atomic_float16_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: + return "a64_untyped_atomic_float32_logical"; case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: return "typed_atomic_logical"; case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: @@ -1101,8 +1105,10 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: - case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: