diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 606f457a785..6fee10058cd 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -8165,32 +8165,9 @@ fs_nir_emit_surface_atomic(nir_to_brw_state &ntb, const fs_builder &bld, } srcs[SURFACE_LOGICAL_SRC_DATA] = data; - fs_inst *inst; - unsigned size_written = 0; /* Emit the actual atomic operation */ - switch (instr->def.bit_size) { - case 16: { - brw_reg dest32 = bld.vgrf(BRW_TYPE_UD); - inst = bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, - retype(dest32, dest.type), - srcs, SURFACE_LOGICAL_NUM_SRCS); - size_written = dest32.component_size(inst->exec_size); - bld.MOV(retype(dest, BRW_TYPE_UW), dest32); - break; - } - - case 32: - case 64: - inst = bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, - dest, srcs, SURFACE_LOGICAL_NUM_SRCS); - size_written = dest.component_size(inst->exec_size); - break; - default: - unreachable("Unsupported bit size"); - } - - assert(size_written); - inst->size_written = size_written * instr->def.num_components; + bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, dest, srcs, + SURFACE_LOGICAL_NUM_SRCS); } static void @@ -8224,30 +8201,8 @@ fs_nir_emit_global_atomic(nir_to_brw_state &ntb, const fs_builder &bld, srcs[A64_LOGICAL_ARG] = brw_imm_ud(op); srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0); - fs_inst *inst; - unsigned size_written = 0; - switch (instr->def.bit_size) { - case 16: { - brw_reg dest32 = bld.vgrf(BRW_TYPE_UD); - inst = bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, - retype(dest32, dest.type), - srcs, A64_LOGICAL_NUM_SRCS); - size_written = dest32.component_size(inst->exec_size); - bld.MOV(retype(dest, BRW_TYPE_UW), dest32); - break; - } - case 32: - case 64: - inst = bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, dest, - srcs, A64_LOGICAL_NUM_SRCS); - size_written = dest.component_size(inst->exec_size); - break; - default: - unreachable("Unsupported bit size"); - } - - assert(size_written); - inst->size_written = size_written * instr->def.num_components; + bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL, dest, + srcs, A64_LOGICAL_NUM_SRCS); } static void diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 5819c4cf282..b6e272c03ec 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1653,7 +1653,8 @@ lsc_bits_to_data_size(unsigned bit_size) } static void -lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) +lower_lsc_surface_logical_send(bblock_t *block, const fs_builder &bld, + fs_inst *inst) { const brw_compiler *compiler = bld.shader->compiler; const intel_device_info *devinfo = bld.shader->devinfo; @@ -1810,6 +1811,20 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS && compiler->extended_bindless_surface_offset; + /* Messages with destination datatypes narrower than a dword use a + * D*32 LSC data size, update the destination to use a temporary of + * the raw (UD) return payload datatype. + */ + if (dst_sz < 4) { + assert(lsc_data_size_bytes(lsc_bits_to_data_size(dst_sz * 8)) == 4); + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + const brw_reg dest32 = bld.vgrf(BRW_TYPE_UD); + const brw_reg_type t = brw_int_type(dst_sz, false); + bld.at(block, inst->next).MOV(retype(inst->dst, t), dest32); + inst->dst = dest32; + inst->size_written = inst->dst.component_size(inst->exec_size); + } + inst->resize_sources(4); if (non_bindless) { @@ -2032,7 +2047,7 @@ emit_fragment_mask(const fs_builder &bld, fs_inst *inst) } static void -lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) +lower_lsc_a64_logical_send(bblock_t *block, const fs_builder &bld, fs_inst *inst) { const intel_device_info *devinfo = bld.shader->devinfo; @@ -2144,6 +2159,20 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) inst->send_has_side_effects = has_side_effects; inst->send_is_volatile = !has_side_effects; + /* Messages with destination datatypes narrower than a dword use a + * D*32 LSC data size, update the destination to use a temporary of + * the raw (UD) return payload datatype. + */ + if (dst_sz < 4) { + assert(lsc_data_size_bytes(lsc_bits_to_data_size(dst_sz * 8)) == 4); + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + const brw_reg dest32 = bld.vgrf(BRW_TYPE_UD); + const brw_reg_type t = brw_int_type(dst_sz, false); + bld.at(block, inst->next).MOV(retype(inst->dst, t), dest32); + inst->dst = dest32; + inst->size_written = inst->dst.component_size(inst->exec_size); + } + /* Set up SFID and descriptors */ inst->sfid = GFX12_SFID_UGM; inst->resize_sources(4); @@ -2805,7 +2834,7 @@ brw_fs_lower_logical_sends(fs_visitor &s) case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: if (devinfo->has_lsc) - lower_lsc_surface_logical_send(ibld, inst); + lower_lsc_surface_logical_send(block, ibld, inst); else lower_surface_logical_send(ibld, inst); break; @@ -2814,7 +2843,7 @@ brw_fs_lower_logical_sends(fs_visitor &s) case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: devinfo->ver >= 20 && devinfo->has_lsc ? - lower_lsc_surface_logical_send(ibld, inst) : + lower_lsc_surface_logical_send(block, ibld, inst) : lower_surface_logical_send(ibld, inst); break; @@ -2836,7 +2865,7 @@ brw_fs_lower_logical_sends(fs_visitor &s) case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: if (devinfo->has_lsc) { - lower_lsc_a64_logical_send(ibld, inst); + lower_lsc_a64_logical_send(block, ibld, inst); break; } lower_a64_logical_send(ibld, inst);