diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 4f666ccb4c0..133f5526f74 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -5920,6 +5920,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } } +static fs_reg +expand_to_32bit(const fs_builder &bld, const fs_reg &src) +{ + if (type_sz(src.type) == 2) { + fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW)); + return src32; + } else { + return src; + } +} + void fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, int op, nir_intrinsic_instr *instr) @@ -5930,7 +5942,8 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, * descriptors provided for Qword atomic ops except for A64 messages. */ assert(nir_dest_bit_size(instr->dest) == 32 || - (nir_dest_bit_size(instr->dest) == 64 && devinfo->has_lsc)); + (nir_dest_bit_size(instr->dest) == 64 && devinfo->has_lsc) || + (nir_dest_bit_size(instr->dest) == 16 && devinfo->has_lsc)); fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -5945,11 +5958,14 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, fs_reg data; if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) - data = get_nir_src(instr->src[2]); + data = expand_to_32bit(bld, get_nir_src(instr->src[2])); if (op == BRW_AOP_CMPWR) { fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { data, get_nir_src(instr->src[3]) }; + fs_reg sources[2] = { + data, + expand_to_32bit(bld, get_nir_src(instr->src[3])) + }; bld.LOAD_PAYLOAD(tmp, sources, 2, 0); data = tmp; } @@ -5957,8 +5973,25 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, /* Emit the actual atomic operation */ - bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, - dest, srcs, SURFACE_LOGICAL_NUM_SRCS); + switch (nir_dest_bit_size(instr->dest)) { + case 16: { + fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, + retype(dest32, dest.type), + srcs, SURFACE_LOGICAL_NUM_SRCS); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), + retype(dest32, BRW_REGISTER_TYPE_UD)); + break; + } + + case 32: + case 64: + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, + dest, srcs, SURFACE_LOGICAL_NUM_SRCS); + break; + default: + unreachable("Unsupported bit size"); + } } void @@ -5976,19 +6009,38 @@ fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); - fs_reg data = get_nir_src(instr->src[2]); + fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[2])); if (op == BRW_AOP_FCMPWR) { fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { data, get_nir_src(instr->src[3]) }; + fs_reg sources[2] = { + data, + expand_to_32bit(bld, get_nir_src(instr->src[3])) + }; bld.LOAD_PAYLOAD(tmp, sources, 2, 0); data = tmp; } srcs[SURFACE_LOGICAL_SRC_DATA] = data; /* Emit the actual atomic operation */ + switch (nir_dest_bit_size(instr->dest)) { + case 16: { + fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, + retype(dest32, dest.type), + srcs, SURFACE_LOGICAL_NUM_SRCS); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), + retype(dest32, BRW_REGISTER_TYPE_UD)); + break; + } - bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, - dest, srcs, SURFACE_LOGICAL_NUM_SRCS); + case 32: + case 64: + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, + dest, srcs, SURFACE_LOGICAL_NUM_SRCS); + break; + default: + unreachable("Unsupported bit size"); + } } void @@ -6007,10 +6059,13 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, fs_reg data; if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) - data = get_nir_src(instr->src[1]); + data = expand_to_32bit(bld, get_nir_src(instr->src[1])); if (op == BRW_AOP_CMPWR) { fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { data, get_nir_src(instr->src[2]) }; + fs_reg sources[2] = { + expand_to_32bit(bld, data), + expand_to_32bit(bld, get_nir_src(instr->src[2])) + }; bld.LOAD_PAYLOAD(tmp, sources, 2, 0); data = tmp; } @@ -6030,8 +6085,25 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, /* Emit the actual atomic operation operation */ - bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, - dest, srcs, SURFACE_LOGICAL_NUM_SRCS); + switch (nir_dest_bit_size(instr->dest)) { + case 16: { + fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, + retype(dest32, dest.type), + srcs, SURFACE_LOGICAL_NUM_SRCS); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), + retype(dest32, BRW_REGISTER_TYPE_UD)); + break; + } + + case 32: + case 64: + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, + dest, srcs, SURFACE_LOGICAL_NUM_SRCS); + break; + default: + unreachable("Unsupported bit size"); + } } void @@ -6048,10 +6120,13 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); - fs_reg data = get_nir_src(instr->src[1]); + fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[1])); if (op == BRW_AOP_FCMPWR) { fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { data, get_nir_src(instr->src[2]) }; + fs_reg sources[2] = { + data, + expand_to_32bit(bld, get_nir_src(instr->src[2])) + }; bld.LOAD_PAYLOAD(tmp, sources, 2, 0); data = tmp; } @@ -6071,20 +6146,26 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld, /* Emit the actual atomic operation operation */ - bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, - dest, srcs, SURFACE_LOGICAL_NUM_SRCS); -} + switch (nir_dest_bit_size(instr->dest)) { + case 16: { + fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, + retype(dest32, dest.type), + srcs, SURFACE_LOGICAL_NUM_SRCS); + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), + retype(dest32, BRW_REGISTER_TYPE_UD)); + break; + } -static fs_reg -expand_to_32bit(const fs_builder &bld, const fs_reg &src) -{ - if (type_sz(src.type) == 2) { - fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW)); - return src32; - } else { - return src; + case 32: + case 64: + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL, + dest, srcs, SURFACE_LOGICAL_NUM_SRCS); + break; + default: + unreachable("Unsupported bit size"); } + } void @@ -6120,7 +6201,8 @@ fs_visitor::nir_emit_global_atomic(const fs_builder &bld, switch (nir_dest_bit_size(instr->dest)) { case 16: { fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL, dest32, + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL, + retype(dest32, dest.type), srcs, A64_LOGICAL_NUM_SRCS); bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32); break; @@ -6169,7 +6251,8 @@ fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld, switch (nir_dest_bit_size(instr->dest)) { case 16: { fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL, dest32, + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL, + retype(dest32, dest.type), srcs, A64_LOGICAL_NUM_SRCS); bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32); break; diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 4972a55e649..6d49124a6f6 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1695,6 +1695,7 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS); const unsigned src_comps = inst->components_read(SURFACE_LOGICAL_SRC_DATA); const unsigned src_sz = type_sz(src.type); + const unsigned dst_sz = type_sz(inst->dst.type); const bool has_side_effects = inst->has_side_effects(); @@ -1758,10 +1759,11 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) inst->opcode == SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL ? brw_atomic_op_to_lsc_fatomic_op(arg.ud) : brw_atomic_op_to_lsc_atomic_op(arg.ud); + inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, 1 /* num_coordinates */, - lsc_bits_to_data_size(src_sz * 8), + lsc_bits_to_data_size(dst_sz * 8), 1 /* num_channels */, false /* transpose */, LSC_CACHE_STORE_L1UC_L3WB, @@ -2032,6 +2034,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) const fs_reg &addr = inst->src[A64_LOGICAL_ADDRESS]; const fs_reg &src = inst->src[A64_LOGICAL_SRC]; const unsigned src_sz = type_sz(src.type); + const unsigned dst_sz = type_sz(inst->dst.type); const unsigned src_comps = inst->components_read(1); assert(inst->src[A64_LOGICAL_ARG].file == IMM); @@ -2102,7 +2105,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, - lsc_bits_to_data_size(src_sz * 8), + lsc_bits_to_data_size(dst_sz * 8), 1 /* num_channels */, false /* transpose */, LSC_CACHE_STORE_L1UC_L3WB,