diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 7168ffe60e7..772eeac15c6 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -671,7 +671,7 @@ enum get_buffer_size_srcs { GET_BUFFER_SIZE_SRCS }; -enum memory_logical_mode { +enum ENUM_PACKED memory_logical_mode { MEMORY_MODE_TYPED, MEMORY_MODE_UNTYPED, MEMORY_MODE_SHARED_LOCAL, @@ -680,17 +680,8 @@ enum memory_logical_mode { }; enum memory_logical_srcs { - /** enum lsc_opcode (as UD immediate) */ - MEMORY_LOGICAL_OPCODE, - - /** enum memory_logical_mode (as UD immediate) */ - MEMORY_LOGICAL_MODE, - - /** enum lsc_addr_surface_type (as UD immediate) */ - MEMORY_LOGICAL_BINDING_TYPE, - /** - * Where to find the surface state. Depends on BINDING_TYPE above: + * Where to find the surface state. Depends on brw_mem_inst::binding_type: * * - SS: pointer to surface state (relative to surface base address) * - BSS: pointer to surface state (relative to bindless surface base) @@ -702,24 +693,6 @@ enum memory_logical_srcs { /** Coordinate/address/offset for where to access memory */ MEMORY_LOGICAL_ADDRESS, - /** Xe2+: offset for where to access memory (as UD immediate) */ - MEMORY_LOGICAL_ADDRESS_OFFSET, - - /** Dimensionality of the "address" source (as UD immediate) */ - MEMORY_LOGICAL_COORD_COMPONENTS, - - /** Required alignment of address in bytes; 0 for natural alignment */ - MEMORY_LOGICAL_ALIGNMENT, - - /** Bit-size in the form of enum lsc_data_size (as UD immediate) */ - MEMORY_LOGICAL_DATA_SIZE, - - /** Number of vector components (as UD immediate) */ - MEMORY_LOGICAL_COMPONENTS, - - /** memory_flags bitfield (as UD immediate) */ - MEMORY_LOGICAL_FLAGS, - /** Data to write for stores or the first operand for atomics */ MEMORY_LOGICAL_DATA0, @@ -1583,7 +1556,7 @@ enum ENUM_PACKED brw_rnd_mode { * one or two messages with different bits to control things like address * size, how much data is read/written, etc. */ -enum lsc_opcode { +enum ENUM_PACKED lsc_opcode { LSC_OP_LOAD = 0, LSC_OP_LOAD_CMASK = 2, LSC_OP_STORE = 4, diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index 98a49cbe059..05df4de7e4f 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -33,6 +33,8 @@ #include "util/bitscan.h" #include "compiler/glsl_types.h" +#include + struct brw_bind_info { bool valid; bool bindless; @@ -4617,42 +4619,42 @@ can_use_instruction_offset(enum lsc_addr_surface_type binding_type, int32_t offs return offset >= u_intN_min(max_bits) && offset <= u_intN_max(max_bits); } -static void -set_memory_address(nir_to_brw_state &ntb, - const brw_builder &bld, - nir_intrinsic_instr *instr, - brw_reg *srcs) +static brw_reg +memory_address(nir_to_brw_state &ntb, + const brw_builder &bld, + nir_intrinsic_instr *instr, + enum lsc_addr_surface_type binding_type, + int32_t *address_offset) { const intel_device_info *devinfo = ntb.devinfo; const nir_src *nir_src_offset = nir_get_io_offset_src(instr); const brw_reg src_offset = get_nir_src_imm(ntb, *nir_src_offset); - const enum lsc_addr_surface_type binding_type = - (enum lsc_addr_surface_type) srcs[MEMORY_LOGICAL_BINDING_TYPE].ud; const brw_builder ubld = src_offset.is_scalar ? bld.scalar_group() : bld; + brw_reg address; if (devinfo->ver < 20 || (!nir_intrinsic_has_base(instr) && !nir_src_is_const(*nir_src_offset))) { - srcs[MEMORY_LOGICAL_ADDRESS] = + address = nir_intrinsic_has_base(instr) ? ubld.ADD(src_offset, brw_imm_int(src_offset.type, nir_intrinsic_base(instr))) : src_offset; - srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0); + *address_offset = 0; } else if (!nir_intrinsic_has_base(instr) && nir_src_is_const(*nir_src_offset)) { const int32_t offset = nir_src_as_int(*nir_src_offset); if (can_use_instruction_offset(binding_type, offset)) { - srcs[MEMORY_LOGICAL_ADDRESS] = brw_imm_ud(0); - srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(offset); + address = brw_imm_ud(0); + *address_offset = offset; } else { - srcs[MEMORY_LOGICAL_ADDRESS] = src_offset; - srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0); + address = src_offset; + *address_offset = 0; } } else { assert(nir_intrinsic_has_base(instr)); const int32_t offset = nir_intrinsic_base(instr); assert(can_use_instruction_offset(binding_type, offset)); - srcs[MEMORY_LOGICAL_ADDRESS] = src_offset; - srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(offset); + address = src_offset; + *address_offset = offset; } /* If nir_src is_scalar, the MEMORY_LOGICAL_ADDRESS will be allocated at @@ -4661,7 +4663,9 @@ set_memory_address(nir_to_brw_state &ntb, * properly also ensures that emit_uniformize (below) will handle the value * as scalar_group() size instead of full dispatch width. */ - srcs[MEMORY_LOGICAL_ADDRESS].is_scalar = src_offset.is_scalar; + address.is_scalar = src_offset.is_scalar; + + return address; } static unsigned @@ -4757,22 +4761,20 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb, cs_prog_data->uses_num_work_groups = true; brw_reg srcs[MEMORY_LOGICAL_NUM_SRCS]; - srcs[MEMORY_LOGICAL_OPCODE] = brw_imm_ud(LSC_OP_LOAD); - srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED); - srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BTI); srcs[MEMORY_LOGICAL_BINDING] = brw_imm_ud(0); srcs[MEMORY_LOGICAL_ADDRESS] = brw_imm_ud(0); - srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud(1); - srcs[MEMORY_LOGICAL_ALIGNMENT] = brw_imm_ud(4); - srcs[MEMORY_LOGICAL_DATA_SIZE] = brw_imm_ud(LSC_DATA_SIZE_D32); - srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(3); - srcs[MEMORY_LOGICAL_FLAGS] = brw_imm_ud(0); - srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0); - brw_inst *inst = + brw_mem_inst *mem = bld.emit(SHADER_OPCODE_MEMORY_LOAD_LOGICAL, - dest, srcs, MEMORY_LOGICAL_NUM_SRCS); - inst->size_written = 3 * s.dispatch_width * 4; + dest, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem(); + mem->size_written = 3 * s.dispatch_width * 4; + mem->lsc_op = LSC_OP_LOAD; + mem->mode = MEMORY_MODE_UNTYPED; + mem->binding_type = LSC_ADDR_SURFTYPE_BTI; + mem->data_size = LSC_DATA_SIZE_D32; + mem->coord_components = 1; + mem->components = 3; + mem->alignment = 4; break; } @@ -7090,30 +7092,26 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, (nir_intrinsic_access(instr) & ACCESS_COHERENT); const unsigned align = nir_intrinsic_has_align(instr) ? nir_intrinsic_align(instr) : 0; - const unsigned logical_flags = + uint8_t flags = (include_helpers ? MEMORY_FLAG_INCLUDE_HELPERS : 0) | (volatile_access ? MEMORY_FLAG_VOLATILE_ACCESS : 0) | (coherent_access ? MEMORY_FLAG_COHERENT_ACCESS : 0); bool no_mask_handle = false; int data_src = -1; - srcs[MEMORY_LOGICAL_OPCODE] = brw_imm_ud(op); - /* BINDING_TYPE, BINDING, and ADDRESS are handled in the switch */ - srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud(1); - srcs[MEMORY_LOGICAL_ALIGNMENT] = brw_imm_ud(align); - /* DATA_SIZE and CHANNELS are handled below the switch */ - srcs[MEMORY_LOGICAL_FLAGS] = brw_imm_ud(logical_flags); - /* DATA0 and DATA1 are handled below */ + uint8_t coord_components = 1; - /* Set the default address offset to 0 */ - srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0); + int32_t address_offset = 0; + + std::optional mode; + std::optional binding_type; switch (instr->intrinsic) { case nir_intrinsic_bindless_image_load: case nir_intrinsic_bindless_image_store: case nir_intrinsic_bindless_image_atomic: case nir_intrinsic_bindless_image_atomic_swap: - srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BSS); + binding_type = LSC_ADDR_SURFTYPE_BSS; FALLTHROUGH; case nir_intrinsic_image_load: case nir_intrinsic_image_store: @@ -7127,22 +7125,22 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, * message. For TGM it will be 4 (U, V, R, SAMPLE_INDEX)." * */ - srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud( + coord_components = (devinfo->ver >= 30 && nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_MS) ? 4 : - nir_image_intrinsic_coord_components(instr)); + nir_image_intrinsic_coord_components(instr); /* MSAA image atomic accesses not supported, must be lowered to UGM */ assert((instr->intrinsic != nir_intrinsic_bindless_image_atomic && instr->intrinsic != nir_intrinsic_bindless_image_atomic_swap) || nir_intrinsic_image_dim(instr) != GLSL_SAMPLER_DIM_MS); - srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_TYPED); + mode = MEMORY_MODE_TYPED; srcs[MEMORY_LOGICAL_BINDING] = get_nir_image_intrinsic_image(ntb, bld, instr); - if (srcs[MEMORY_LOGICAL_BINDING_TYPE].file == BAD_FILE) - srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BTI); + if (!binding_type.has_value()) + binding_type = LSC_ADDR_SURFTYPE_BTI; srcs[MEMORY_LOGICAL_ADDRESS] = get_nir_src(ntb, instr->src[1], 0); @@ -7150,7 +7148,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, break; case nir_intrinsic_load_ubo_uniform_block_intel: - srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_CONSTANT); + mode = MEMORY_MODE_CONSTANT; FALLTHROUGH; case nir_intrinsic_load_ssbo: case nir_intrinsic_load_ssbo_intel: @@ -7161,14 +7159,14 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, case nir_intrinsic_load_ssbo_block_intel: case nir_intrinsic_store_ssbo_block_intel: case nir_intrinsic_load_ssbo_uniform_block_intel: - if (srcs[MEMORY_LOGICAL_MODE].file == BAD_FILE) - srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED); - srcs[MEMORY_LOGICAL_BINDING_TYPE] = - brw_imm_ud(get_nir_src_bindless(ntb, instr->src[is_store ? 1 : 0]) ? - LSC_ADDR_SURFTYPE_BSS : LSC_ADDR_SURFTYPE_BTI); + if (!mode.has_value()) + mode = MEMORY_MODE_UNTYPED; + binding_type = get_nir_src_bindless(ntb, instr->src[is_store ? 1 : 0]) ? + LSC_ADDR_SURFTYPE_BSS : LSC_ADDR_SURFTYPE_BTI; srcs[MEMORY_LOGICAL_BINDING] = get_nir_buffer_intrinsic_index(ntb, bld, instr, &no_mask_handle); - set_memory_address(ntb, bld, instr, srcs); + srcs[MEMORY_LOGICAL_ADDRESS] = + memory_address(ntb, bld, instr, *binding_type, &address_offset); data_src = is_atomic ? 2 : 0; break; case nir_intrinsic_load_shared: @@ -7178,21 +7176,22 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, case nir_intrinsic_load_shared_block_intel: case nir_intrinsic_store_shared_block_intel: case nir_intrinsic_load_shared_uniform_block_intel: { - srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_SHARED_LOCAL); - srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT); - set_memory_address(ntb, bld, instr, srcs); + mode = MEMORY_MODE_SHARED_LOCAL; + binding_type = LSC_ADDR_SURFTYPE_FLAT; + srcs[MEMORY_LOGICAL_ADDRESS] = + memory_address(ntb, bld, instr, *binding_type, &address_offset); data_src = is_atomic ? 1 : 0; no_mask_handle = true; break; } case nir_intrinsic_load_scratch: case nir_intrinsic_store_scratch: { - srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_SCRATCH); + mode = MEMORY_MODE_SCRATCH; const nir_src &addr = instr->src[is_store ? 1 : 0]; if (devinfo->verx10 >= 125) { - srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_SS); + binding_type = LSC_ADDR_SURFTYPE_SS; const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0); brw_reg bind = ubld.AND(retype(brw_vec1_grf(0, 5), BRW_TYPE_UD), @@ -7214,8 +7213,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, /* load_scratch / store_scratch cannot be is_scalar yet. */ assert(xbld.dispatch_width() == bld.dispatch_width()); - srcs[MEMORY_LOGICAL_BINDING_TYPE] = - brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT); + binding_type = LSC_ADDR_SURFTYPE_FLAT; srcs[MEMORY_LOGICAL_ADDRESS] = swizzle_nir_scratch_addr(ntb, bld, addr, dword_aligned); } @@ -7237,9 +7235,10 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, case nir_intrinsic_global_atomic_swap: case nir_intrinsic_load_global_block_intel: case nir_intrinsic_store_global_block_intel: - srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED); - srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT); - set_memory_address(ntb, bld, instr, srcs); + mode = MEMORY_MODE_UNTYPED; + binding_type = LSC_ADDR_SURFTYPE_FLAT; + srcs[MEMORY_LOGICAL_ADDRESS] = + memory_address(ntb, bld, instr, *binding_type, &address_offset); data_src = is_atomic ? 1 : 0; no_mask_handle = srcs[MEMORY_LOGICAL_ADDRESS].is_scalar; break; @@ -7253,15 +7252,11 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, if (components == 0) components = instr->num_components; - srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(components); - const unsigned nir_bit_size = is_store ? instr->src[data_src].ssa->bit_size : instr->def.bit_size; - enum lsc_data_size data_size = lsc_bits_to_data_size(nir_bit_size); + const enum lsc_data_size data_size = lsc_bits_to_data_size(nir_bit_size); uint32_t data_bit_size = lsc_data_size_bytes(data_size) * 8; - srcs[MEMORY_LOGICAL_DATA_SIZE] = brw_imm_ud(data_size); - const brw_reg_type data_type = brw_type_with_size(BRW_TYPE_UD, data_bit_size); const brw_reg_type nir_data_type = @@ -7310,11 +7305,20 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, instr->intrinsic == nir_intrinsic_store_shared_block_intel || instr->intrinsic == nir_intrinsic_store_ssbo_block_intel; - brw_inst *inst; + brw_mem_inst *mem; if (!block) { - inst = xbld.emit(opcode, dest, srcs, MEMORY_LOGICAL_NUM_SRCS); - inst->size_written *= components; + mem = xbld.emit(opcode, dest, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem(); + mem->size_written *= components; + mem->lsc_op = op; + mem->mode = *mode; + mem->binding_type = *binding_type; + mem->address_offset = address_offset; + mem->coord_components = coord_components; + mem->data_size = data_size; + mem->components = components; + mem->alignment = align; + mem->flags = flags; if (dest.file != BAD_FILE && data_bit_size > nir_bit_size) { /* Shrink e.g. D16U32 result back to D16 */ @@ -7326,8 +7330,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, } else { assert(nir_bit_size == 32); - srcs[MEMORY_LOGICAL_FLAGS] = - brw_imm_ud(MEMORY_FLAG_TRANSPOSE | srcs[MEMORY_LOGICAL_FLAGS].ud); + flags |= MEMORY_FLAG_TRANSPOSE; srcs[MEMORY_LOGICAL_ADDRESS] = bld.emit_uniformize(srcs[MEMORY_LOGICAL_ADDRESS]); @@ -7344,8 +7347,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, */ if (srcs[MEMORY_LOGICAL_ADDRESS].file == IMM && align >= data_bit_size / 8 && - (devinfo->has_lsc || - srcs[MEMORY_LOGICAL_MODE].ud != MEMORY_MODE_SHARED_LOCAL)) { + (devinfo->has_lsc || mode != MEMORY_MODE_SHARED_LOCAL)) { first_read_component = nir_def_first_component_read(&instr->def); unsigned last_component = nir_def_last_component_read(&instr->def); srcs[MEMORY_LOGICAL_ADDRESS].u64 += @@ -7368,8 +7370,6 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, block_comps = choose_block_size_dwords(devinfo, total - done); const unsigned block_bytes = block_comps * (nir_bit_size / 8); - srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(block_comps); - brw_reg dst_offset = is_store ? brw_reg() : retype(byte_offset(dest, done * 4), BRW_TYPE_UD); if (is_store) { @@ -7377,10 +7377,19 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb, retype(byte_offset(src, done * 4), BRW_TYPE_UD); } - inst = ubld.emit(opcode, dst_offset, srcs, MEMORY_LOGICAL_NUM_SRCS); - inst->has_no_mask_send_params = no_mask_handle; + mem = ubld.emit(opcode, dst_offset, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem(); + mem->has_no_mask_send_params = no_mask_handle; if (is_load) - inst->size_written = block_bytes; + mem->size_written = block_bytes; + mem->lsc_op = op; + mem->mode = *mode; + mem->binding_type = *binding_type; + mem->address_offset = address_offset; + mem->coord_components = coord_components; + mem->data_size = data_size; + mem->components = block_comps; + mem->alignment = align; + mem->flags = flags; if (brw_type_size_bits(srcs[MEMORY_LOGICAL_ADDRESS].type) == 64) { increment_a64_address(ubld, srcs[MEMORY_LOGICAL_ADDRESS], diff --git a/src/intel/compiler/brw_inst.cpp b/src/intel/compiler/brw_inst.cpp index e481a21bd05..7b76e5af02e 100644 --- a/src/intel/compiler/brw_inst.cpp +++ b/src/intel/compiler/brw_inst.cpp @@ -15,6 +15,7 @@ static inline unsigned brw_inst_kind_size(brw_inst_kind kind) { STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_tex_inst)); + STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_mem_inst)); /* TODO: Temporarily here to ensure all instructions can be converted to * SEND. Once all new kinds are added, change so that BASE allocate only @@ -178,6 +179,11 @@ brw_inst_kind_for_opcode(enum opcode opcode) case SHADER_OPCODE_SAMPLEINFO_LOGICAL: return BRW_KIND_TEX; + case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: + case SHADER_OPCODE_MEMORY_STORE_LOGICAL: + case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: + return BRW_KIND_MEM; + default: return BRW_KIND_BASE; } @@ -221,14 +227,6 @@ brw_inst::is_control_source(unsigned arg) const case SHADER_OPCODE_SEND_GATHER: return arg < SEND_SRC_PAYLOAD1; - case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: - case SHADER_OPCODE_MEMORY_STORE_LOGICAL: - case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: - return arg != MEMORY_LOGICAL_BINDING && - arg != MEMORY_LOGICAL_ADDRESS && - arg != MEMORY_LOGICAL_DATA0 && - arg != MEMORY_LOGICAL_DATA1; - case SHADER_OPCODE_QUAD_SWAP: case SHADER_OPCODE_INCLUSIVE_SCAN: case SHADER_OPCODE_EXCLUSIVE_SCAN: @@ -492,13 +490,15 @@ brw_inst::components_read(unsigned i) const if (i == MEMORY_LOGICAL_DATA1) return 0; FALLTHROUGH; - case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: + case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: { + const brw_mem_inst *mem = as_mem(); if (i == MEMORY_LOGICAL_DATA0 || i == MEMORY_LOGICAL_DATA1) - return src[MEMORY_LOGICAL_COMPONENTS].ud; + return mem->components; else if (i == MEMORY_LOGICAL_ADDRESS) - return src[MEMORY_LOGICAL_COORD_COMPONENTS].ud; + return mem->coord_components; else return 1; + } case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: return (i == 0 ? 2 : 1); @@ -946,8 +946,7 @@ brw_inst::is_volatile() const case SHADER_OPCODE_LOAD_REG: return true; case SHADER_OPCODE_MEMORY_STORE_LOGICAL: - assert(sources > MEMORY_LOGICAL_FLAGS); - return src[MEMORY_LOGICAL_FLAGS].ud & MEMORY_FLAG_VOLATILE_ACCESS; + return as_mem()->flags & MEMORY_FLAG_VOLATILE_ACCESS; case SHADER_OPCODE_SEND: case SHADER_OPCODE_SEND_GATHER: return as_send()->is_volatile; diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index 4d6655de7f7..071b20a6f21 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -43,6 +43,7 @@ enum ENUM_PACKED brw_inst_kind { BRW_KIND_BASE, BRW_KIND_SEND, BRW_KIND_TEX, + BRW_KIND_MEM, }; brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode); @@ -70,6 +71,7 @@ struct brw_inst : brw_exec_node { KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND); KIND_HELPERS(as_tex, brw_tex_inst, BRW_KIND_TEX); + KIND_HELPERS(as_mem, brw_mem_inst, BRW_KIND_MEM); #undef KIND_HELPERS @@ -278,6 +280,22 @@ struct brw_tex_inst : brw_inst { bool residency; }; +struct brw_mem_inst : brw_inst { + enum lsc_opcode lsc_op; + enum memory_logical_mode mode; + enum lsc_addr_surface_type binding_type; + enum lsc_data_size data_size; + + uint8_t coord_components; + uint8_t components; + uint8_t flags; + + /** Required alignment of address in bytes; 0 for natural alignment */ + uint32_t alignment; + + int32_t address_offset; +}; + /** * Make the execution of \p inst dependent on the evaluation of a possibly * inverted predicate. diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 163977bd9b4..5981ddb6981 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1478,48 +1478,35 @@ lsc_addr_size_for_type(enum brw_reg_type type) } static void -lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst) +lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem) { const intel_device_info *devinfo = bld.shader->devinfo; assert(devinfo->has_lsc); - assert(inst->src[MEMORY_LOGICAL_OPCODE].file == IMM); - assert(inst->src[MEMORY_LOGICAL_MODE].file == IMM); - assert(inst->src[MEMORY_LOGICAL_BINDING_TYPE].file == IMM); - assert(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].file == IMM); - assert(inst->src[MEMORY_LOGICAL_DATA_SIZE].file == IMM); - assert(inst->src[MEMORY_LOGICAL_FLAGS].file == IMM); - /* Get the logical send arguments. */ - const enum lsc_opcode op = (lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud; - const enum memory_logical_mode mode = - (enum memory_logical_mode) inst->src[MEMORY_LOGICAL_MODE].ud; - const enum lsc_addr_surface_type binding_type = - (enum lsc_addr_surface_type) inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud; - const brw_reg binding = inst->src[MEMORY_LOGICAL_BINDING]; - const brw_reg addr = inst->src[MEMORY_LOGICAL_ADDRESS]; - const unsigned coord_components = - inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].ud; - enum lsc_data_size data_size = - (enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud; - const unsigned components = inst->src[MEMORY_LOGICAL_COMPONENTS].ud; - const enum memory_flags flags = - (enum memory_flags) inst->src[MEMORY_LOGICAL_FLAGS].ud; - const bool transpose = flags & MEMORY_FLAG_TRANSPOSE; - const bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS; - const bool volatile_access = flags & MEMORY_FLAG_VOLATILE_ACCESS; - const bool coherent_access = flags & MEMORY_FLAG_COHERENT_ACCESS; - const brw_reg data0 = inst->src[MEMORY_LOGICAL_DATA0]; - const brw_reg data1 = inst->src[MEMORY_LOGICAL_DATA1]; - const bool has_side_effects = inst->has_side_effects(); + const brw_reg binding = mem->src[MEMORY_LOGICAL_BINDING]; + const brw_reg addr = mem->src[MEMORY_LOGICAL_ADDRESS]; + const brw_reg data0 = mem->src[MEMORY_LOGICAL_DATA0]; + const brw_reg data1 = mem->src[MEMORY_LOGICAL_DATA1]; + + const enum lsc_opcode op = mem->lsc_op; + const enum memory_logical_mode mode = mem->mode; + const enum lsc_addr_surface_type binding_type = mem->binding_type; + const unsigned coord_components = mem->coord_components; + enum lsc_data_size data_size = mem->data_size; + const unsigned components = mem->components; + const bool transpose = mem->flags & MEMORY_FLAG_TRANSPOSE; + const bool include_helpers = mem->flags & MEMORY_FLAG_INCLUDE_HELPERS; + const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS; + const bool coherent_access = mem->flags & MEMORY_FLAG_COHERENT_ACCESS; + const bool has_side_effects = mem->has_side_effects(); const uint32_t data_size_B = lsc_data_size_bytes(data_size); const enum brw_reg_type data_type = brw_type_with_size(data0.type, data_size_B * 8); const enum lsc_addr_size addr_size = lsc_addr_size_for_type(addr.type); - assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM); - const int32_t base_offset = inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d; + const int32_t base_offset = mem->address_offset; /** * TGM messages cannot have a base offset @@ -1530,9 +1517,9 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst) brw_reg payload = addr; if (addr.file != VGRF || !addr.is_contiguous()) { - if (inst->force_writemask_all) { + if (mem->force_writemask_all) { const brw_builder dbld = - inst->exec_size == 1 ? + mem->exec_size == 1 ? bld.scalar_group() : bld.group(bld.shader->dispatch_width, 0); payload = dbld.move_to_vgrf(addr, coord_components); @@ -1565,7 +1552,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst) payload2 = bld.vgrf(data0.type, size); bld.LOAD_PAYLOAD(payload2, data, size, 0); - ex_mlen = (size * brw_type_size_bytes(data_type) * inst->exec_size) / REG_SIZE; + ex_mlen = (size * brw_type_size_bytes(data_type) * mem->exec_size) / REG_SIZE; } } @@ -1616,13 +1603,13 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst) */ if (bld.shader->stage == MESA_SHADER_FRAGMENT && !transpose) { if (include_helpers) - emit_predicate_on_vector_mask(bld, inst); + emit_predicate_on_vector_mask(bld, mem); else if (has_side_effects && mode != MEMORY_MODE_SCRATCH) - brw_emit_predicate_on_sample_mask(bld, inst); + brw_emit_predicate_on_sample_mask(bld, mem); } - brw_send_inst *send = brw_transform_inst_to_send(bld, inst); - inst = NULL; + brw_send_inst *send = brw_transform_inst_to_send(bld, mem); + mem = NULL; switch (mode) { case MEMORY_MODE_UNTYPED: @@ -1704,42 +1691,29 @@ emit_a64_oword_block_header(const brw_builder &bld, const brw_reg &addr) } static void -lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) +lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem) { const intel_device_info *devinfo = bld.shader->devinfo; const brw_compiler *compiler = bld.shader->compiler; - assert(inst->src[MEMORY_LOGICAL_OPCODE].file == IMM); - assert(inst->src[MEMORY_LOGICAL_MODE].file == IMM); - assert(inst->src[MEMORY_LOGICAL_BINDING_TYPE].file == IMM); - assert(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].file == IMM); - assert(inst->src[MEMORY_LOGICAL_DATA_SIZE].file == IMM); - assert(inst->src[MEMORY_LOGICAL_FLAGS].file == IMM); - assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM); - /* Get the logical send arguments. */ - const enum lsc_opcode op = (lsc_opcode)inst->src[MEMORY_LOGICAL_OPCODE].ud; - const enum memory_logical_mode mode = - (enum memory_logical_mode) inst->src[MEMORY_LOGICAL_MODE].ud; - enum lsc_addr_surface_type binding_type = - (enum lsc_addr_surface_type) inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud; - brw_reg binding = inst->src[MEMORY_LOGICAL_BINDING]; - const brw_reg addr = inst->src[MEMORY_LOGICAL_ADDRESS]; - const unsigned coord_components = - inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].ud; - const unsigned alignment = inst->src[MEMORY_LOGICAL_ALIGNMENT].ud; - const unsigned components = inst->src[MEMORY_LOGICAL_COMPONENTS].ud; - const enum memory_flags flags = - (enum memory_flags) inst->src[MEMORY_LOGICAL_FLAGS].ud; - const bool block = flags & MEMORY_FLAG_TRANSPOSE; - const bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS; - const bool volatile_access = flags & MEMORY_FLAG_VOLATILE_ACCESS; - const brw_reg data0 = inst->src[MEMORY_LOGICAL_DATA0]; - const brw_reg data1 = inst->src[MEMORY_LOGICAL_DATA1]; - const bool has_side_effects = inst->has_side_effects(); - const bool has_dest = inst->dst.file != BAD_FILE && !inst->dst.is_null(); - assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM && - inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d == 0); + brw_reg binding = mem->src[MEMORY_LOGICAL_BINDING]; + const brw_reg addr = mem->src[MEMORY_LOGICAL_ADDRESS]; + const brw_reg data0 = mem->src[MEMORY_LOGICAL_DATA0]; + const brw_reg data1 = mem->src[MEMORY_LOGICAL_DATA1]; + + const enum lsc_opcode op = mem->lsc_op; + const enum memory_logical_mode mode = mem->mode; + enum lsc_addr_surface_type binding_type = mem->binding_type; + const unsigned coord_components = mem->coord_components; + const unsigned alignment = mem->alignment; + const unsigned components = mem->components; + const bool block = mem->flags & MEMORY_FLAG_TRANSPOSE; + const bool include_helpers = mem->flags & MEMORY_FLAG_INCLUDE_HELPERS; + const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS; + const bool has_side_effects = mem->has_side_effects(); + const bool has_dest = mem->dst.file != BAD_FILE && !mem->dst.is_null(); + assert(mem->address_offset == 0); /* Don't predicate scratch writes on the sample mask. Otherwise, * FS helper invocations would load undefined values from scratch memory. @@ -1749,8 +1723,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) */ bool allow_sample_mask = has_side_effects && mode != MEMORY_MODE_SCRATCH; - const enum lsc_data_size data_size = - (enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud; + const enum lsc_data_size data_size = mem->data_size; /* unpadded data size */ const uint32_t data_bit_size = @@ -1801,10 +1774,10 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) */ if (bld.shader->stage == MESA_SHADER_FRAGMENT) { if (include_helpers) - emit_predicate_on_vector_mask(bld, inst); + emit_predicate_on_vector_mask(bld, mem); else if (allow_sample_mask && (header.file == BAD_FILE || !surface_access)) - brw_emit_predicate_on_sample_mask(bld, inst); + brw_emit_predicate_on_sample_mask(bld, mem); } brw_reg payload, payload2; @@ -1870,12 +1843,12 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) sfid = BRW_SFID_HDC1; if (lsc_opcode_is_atomic(op)) { - desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group, + desc = brw_dp_typed_atomic_desc(devinfo, mem->exec_size, mem->group, lsc_op_to_legacy_atomic(op), has_dest); } else { - desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size, - inst->group, components, !has_dest); + desc = brw_dp_typed_surface_rw_desc(devinfo, mem->exec_size, + mem->group, components, !has_dest); } } else if (mode == MEMORY_MODE_CONSTANT) { assert(block); /* non-block loads not yet handled */ @@ -1891,11 +1864,11 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) if (lsc_opcode_is_atomic(op)) { unsigned aop = lsc_op_to_legacy_atomic(op); if (lsc_opcode_is_atomic_float(op)) { - desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size, + desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, mem->exec_size, data_bit_size, aop, has_dest); } else { - desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, + desc = brw_dp_a64_untyped_atomic_desc(devinfo, mem->exec_size, data_bit_size, aop, has_dest); } @@ -1903,10 +1876,10 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) desc = brw_dp_a64_oword_block_rw_desc(devinfo, oword_aligned, components, !has_dest); } else if (byte_scattered) { - desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, inst->exec_size, + desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, mem->exec_size, data_bit_size, !has_dest); } else { - desc = brw_dp_a64_untyped_surface_rw_desc(devinfo, inst->exec_size, + desc = brw_dp_a64_untyped_surface_rw_desc(devinfo, mem->exec_size, components, !has_dest); } } else { @@ -1917,31 +1890,31 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst) if (lsc_opcode_is_atomic(op)) { unsigned aop = lsc_op_to_legacy_atomic(op); if (lsc_opcode_is_atomic_float(op)) { - desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size, + desc = brw_dp_untyped_atomic_float_desc(devinfo, mem->exec_size, aop, has_dest); } else { - desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size, + desc = brw_dp_untyped_atomic_desc(devinfo, mem->exec_size, aop, has_dest); } } else if (block) { desc = brw_dp_oword_block_rw_desc(devinfo, oword_aligned, components, !has_dest); } else if (byte_scattered) { - desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size, + desc = brw_dp_byte_scattered_rw_desc(devinfo, mem->exec_size, data_bit_size, !has_dest); } else if (dword_scattered) { - desc = brw_dp_dword_scattered_rw_desc(devinfo, inst->exec_size, + desc = brw_dp_dword_scattered_rw_desc(devinfo, mem->exec_size, !has_dest); } else { - desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size, + desc = brw_dp_untyped_surface_rw_desc(devinfo, mem->exec_size, components, !has_dest); } } assert(sfid); - brw_send_inst *send = brw_transform_inst_to_send(bld, inst); - inst = NULL; + brw_send_inst *send = brw_transform_inst_to_send(bld, mem); + mem = NULL; send->sfid = sfid; send->mlen = mlen; @@ -2682,14 +2655,15 @@ brw_lower_logical_sends(brw_shader &s) case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: case SHADER_OPCODE_MEMORY_STORE_LOGICAL: - case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: + case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: { + brw_mem_inst *mem = inst->as_mem(); if (devinfo->ver >= 20 || - (devinfo->has_lsc && - inst->src[MEMORY_LOGICAL_MODE].ud != MEMORY_MODE_TYPED)) - lower_lsc_memory_logical_send(ibld, inst); + (devinfo->has_lsc && mem->mode != MEMORY_MODE_TYPED)) + lower_lsc_memory_logical_send(ibld, mem); else - lower_hdc_memory_logical_send(ibld, inst); + lower_hdc_memory_logical_send(ibld, mem); break; + } case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: if (devinfo->has_lsc && !s.compiler->indirect_ubos_use_sampler) diff --git a/src/intel/compiler/brw_lower_simd_width.cpp b/src/intel/compiler/brw_lower_simd_width.cpp index c9e0594ebd0..63ae2b71dcd 100644 --- a/src/intel/compiler/brw_lower_simd_width.cpp +++ b/src/intel/compiler/brw_lower_simd_width.cpp @@ -376,21 +376,23 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst) case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: case SHADER_OPCODE_MEMORY_STORE_LOGICAL: - case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: - if (devinfo->ver >= 20) - return inst->exec_size; + case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: { + const brw_mem_inst *mem = inst->as_mem(); - if (inst->src[MEMORY_LOGICAL_MODE].ud == MEMORY_MODE_TYPED) + if (devinfo->ver >= 20) + return mem->exec_size; + + if (mem->mode == MEMORY_MODE_TYPED) return 8; /* HDC A64 atomics are limited to SIMD8 */ if (!devinfo->has_lsc && - inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT - && lsc_opcode_is_atomic((enum lsc_opcode) - inst->src[MEMORY_LOGICAL_OPCODE].ud)) + mem->binding_type == LSC_ADDR_SURFTYPE_FLAT && + lsc_opcode_is_atomic(mem->lsc_op)) return 8; - return MIN2(16, inst->exec_size); + return MIN2(16, mem->exec_size); + } /* On gfx12 parameters are fixed to 16-bit values and therefore they all * always fit regardless of the execution size. diff --git a/src/intel/compiler/brw_opt_cse.cpp b/src/intel/compiler/brw_opt_cse.cpp index 0947b8e20b6..b187b32a45e 100644 --- a/src/intel/compiler/brw_opt_cse.cpp +++ b/src/intel/compiler/brw_opt_cse.cpp @@ -131,7 +131,7 @@ is_expression(const brw_shader *v, const brw_inst *const inst) case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: return true; case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: - return inst->src[MEMORY_LOGICAL_MODE].ud == MEMORY_MODE_CONSTANT; + return inst->as_mem()->mode == MEMORY_MODE_CONSTANT; case SHADER_OPCODE_LOAD_PAYLOAD: return !is_coalescing_payload(*v, inst); case SHADER_OPCODE_SEND: @@ -259,6 +259,20 @@ tex_inst_match(brw_tex_inst *a, brw_tex_inst *b) a->residency == b->residency; } +static bool +mem_inst_match(brw_mem_inst *a, brw_mem_inst *b) +{ + return a->lsc_op == b->lsc_op && + a->mode == b->mode && + a->binding_type == b->binding_type && + a->data_size == b->data_size && + a->coord_components == b->coord_components && + a->components == b->components && + a->flags == b->flags && + a->alignment == b->alignment && + a->address_offset == b->address_offset; +} + static bool instructions_match(brw_inst *a, brw_inst *b, bool *negate) { @@ -267,6 +281,7 @@ instructions_match(brw_inst *a, brw_inst *b, bool *negate) /* `kind` is derived from opcode, so skipped. */ (a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) && (a->kind != BRW_KIND_TEX || tex_inst_match(a->as_tex(), b->as_tex())) && + (a->kind != BRW_KIND_MEM || mem_inst_match(a->as_mem(), b->as_mem())) && a->exec_size == b->exec_size && a->group == b->group && a->predicate == b->predicate && @@ -362,6 +377,26 @@ hash_inst(const void *v) break; } + case BRW_KIND_MEM: { + const brw_mem_inst *mem = inst->as_mem(); + const uint8_t mem_u8data[] = { + mem->lsc_op, + mem->mode, + mem->binding_type, + mem->data_size, + mem->coord_components, + mem->components, + mem->flags, + }; + const uint32_t mem_u32data[] = { + (uint32_t)mem->address_offset, + mem->alignment, + }; + hash = HASH(hash, mem_u8data); + hash = HASH(hash, mem_u32data); + break; + } + case BRW_KIND_BASE: /* Nothing else to do. */ break; diff --git a/src/intel/compiler/brw_print.cpp b/src/intel/compiler/brw_print.cpp index f7a56deb080..7dec8aaa213 100644 --- a/src/intel/compiler/brw_print.cpp +++ b/src/intel/compiler/brw_print.cpp @@ -321,61 +321,17 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op) static bool print_memory_logical_source(FILE *file, const brw_inst *inst, unsigned i) { - if (inst->is_control_source(i)) { - assert(inst->src[i].file == IMM && - (inst->src[i].type == BRW_TYPE_UD || - inst->src[i].type == BRW_TYPE_D)); - assert(!inst->src[i].negate); - assert(!inst->src[i].abs); - } - switch (i) { - case MEMORY_LOGICAL_OPCODE: - fprintf(file, " %s", brw_lsc_op_to_string(inst->src[i].ud)); - return true; - case MEMORY_LOGICAL_MODE: { - static const char *modes[] = { - [MEMORY_MODE_TYPED] = "typed", - [MEMORY_MODE_UNTYPED] = "untyped", - [MEMORY_MODE_SHARED_LOCAL] = "shared", - [MEMORY_MODE_SCRATCH] = "scratch", - [MEMORY_MODE_CONSTANT] = "const", - }; - assert(inst->src[i].ud < ARRAY_SIZE(modes)); - fprintf(file, " %s", modes[inst->src[i].ud]); - return true; - } - case MEMORY_LOGICAL_BINDING_TYPE: - fprintf(file, " %s", brw_lsc_addr_surftype_to_string(inst->src[i].ud)); - if (inst->src[i].ud != LSC_ADDR_SURFTYPE_FLAT) + case MEMORY_LOGICAL_BINDING: { + lsc_addr_surface_type binding_type = inst->as_mem()->binding_type; + fprintf(file, " %s", brw_lsc_addr_surftype_to_string(binding_type)); + if (binding_type != LSC_ADDR_SURFTYPE_FLAT) fprintf(file, ":"); - return true; - case MEMORY_LOGICAL_BINDING: return inst->src[i].file == BAD_FILE; + } case MEMORY_LOGICAL_ADDRESS: fprintf(file, " addr: "); return false; - case MEMORY_LOGICAL_ADDRESS_OFFSET: - fprintf(file, " offset: "); - return false; - case MEMORY_LOGICAL_COORD_COMPONENTS: - fprintf(file, " coord_comps:"); - return false; - case MEMORY_LOGICAL_ALIGNMENT: - fprintf(file, " align:"); - return false; - case MEMORY_LOGICAL_DATA_SIZE: - fprintf(file, " %s", brw_lsc_data_size_to_string(inst->src[i].ud)); - return true; - case MEMORY_LOGICAL_COMPONENTS: - fprintf(file, " comps:"); - return false; - case MEMORY_LOGICAL_FLAGS: - if (inst->src[i].ud & MEMORY_FLAG_TRANSPOSE) - fprintf(file, " transpose"); - if (inst->src[i].ud & MEMORY_FLAG_INCLUDE_HELPERS) - fprintf(file, " helpers"); - return true; case MEMORY_LOGICAL_DATA0: fprintf(file, " data0: "); return false; @@ -499,10 +455,37 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type)); } + const brw_mem_inst *mem = inst->as_mem(); + if (mem) { + fprintf(file, " %s", brw_lsc_op_to_string(mem->lsc_op)); + + static const char *modes[] = { + [MEMORY_MODE_TYPED] = "typed", + [MEMORY_MODE_UNTYPED] = "untyped", + [MEMORY_MODE_SHARED_LOCAL] = "shared", + [MEMORY_MODE_SCRATCH] = "scratch", + [MEMORY_MODE_CONSTANT] = "const", + }; + assert(mem->mode < ARRAY_SIZE(modes)); + fprintf(file, " %s", modes[mem->mode]); + fprintf(file, " offset: %dd", mem->address_offset); + fprintf(file, " coord_comps: %uu", mem->coord_components); + fprintf(file, " %s", brw_lsc_data_size_to_string(mem->data_size)); + fprintf(file, " comps: %uu", mem->components); + fprintf(file, " align: %uu", mem->alignment); + + if (mem->flags & MEMORY_FLAG_TRANSPOSE) + fprintf(file, " transpose"); + if (mem->flags & MEMORY_FLAG_INCLUDE_HELPERS) + fprintf(file, " helpers"); + if (mem->flags & MEMORY_FLAG_VOLATILE_ACCESS) + fprintf(file, " volatile"); + if (mem->flags & MEMORY_FLAG_COHERENT_ACCESS) + fprintf(file, " coherent"); + } + for (int i = 0; i < inst->sources; i++) { - if (inst->opcode == SHADER_OPCODE_MEMORY_LOAD_LOGICAL || - inst->opcode == SHADER_OPCODE_MEMORY_STORE_LOGICAL || - inst->opcode == SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL) { + if (mem) { if (print_memory_logical_source(file, inst, i)) continue; } else { diff --git a/src/intel/compiler/brw_validate.cpp b/src/intel/compiler/brw_validate.cpp index 4c8a22e09ea..8add43563b1 100644 --- a/src/intel/compiler/brw_validate.cpp +++ b/src/intel/compiler/brw_validate.cpp @@ -90,36 +90,18 @@ is_ud_imm(const brw_reg ®) return reg.file == IMM && reg.type == BRW_TYPE_UD; } -static inline bool -is_d_imm(const brw_reg ®) -{ - return reg.file == IMM && reg.type == BRW_TYPE_D; -} - static void -validate_memory_logical(const brw_shader &s, const brw_inst *inst) +validate_memory_logical(const brw_shader &s, const brw_mem_inst *inst) { const intel_device_info *devinfo = s.devinfo; - VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE])); - VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE])); - VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE])); - VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS])); - VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT])); - VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE])); - VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS])); - VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS])); - VAL_ASSERT(is_d_imm(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET])); - - enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud; - enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud; + enum lsc_opcode op = inst->lsc_op; + enum memory_flags flags = (memory_flags)inst->flags; bool transpose = flags & MEMORY_FLAG_TRANSPOSE; bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS; - enum memory_logical_mode mode = - (memory_logical_mode)inst->src[MEMORY_LOGICAL_MODE].ud; + enum memory_logical_mode mode = inst->mode; - enum lsc_data_size data_size = - (enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud; + enum lsc_data_size data_size = inst->data_size; unsigned data_size_B = lsc_data_size_bytes(data_size); if (!devinfo->has_lsc) { @@ -131,18 +113,18 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst) if (transpose) { const unsigned min_alignment = mode == MEMORY_MODE_SHARED_LOCAL ? 16 : 4; - VAL_ASSERT_GE(inst->src[MEMORY_LOGICAL_ALIGNMENT].ud, min_alignment); + VAL_ASSERT_GE(inst->alignment, min_alignment); } } VAL_ASSERT(!transpose || !include_helpers); VAL_ASSERT(!transpose || lsc_opcode_has_transpose(op)); - if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT) + if (inst->binding_type == LSC_ADDR_SURFTYPE_FLAT) VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_BINDING].file, BAD_FILE); if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) { - VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud, + VAL_ASSERT_EQ(inst->components, inst->components_read(MEMORY_LOGICAL_DATA1)); VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_DATA0].type, @@ -150,7 +132,7 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst) } if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) { - VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud, + VAL_ASSERT_EQ(inst->components, inst->components_read(MEMORY_LOGICAL_DATA0)); VAL_ASSERT_EQ(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type), @@ -162,10 +144,10 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst) /** TGM messages cannot have a base offset */ if (mode == MEMORY_MODE_TYPED) - VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d, 0); + VAL_ASSERT_EQ(inst->as_mem()->address_offset, 0); /* Offset must be DWord aligned */ - VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d % 4), 0); + VAL_ASSERT_EQ((inst->as_mem()->address_offset % 4), 0); switch (inst->opcode) { case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: @@ -185,7 +167,7 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst) (lsc_op_num_data_values(op) < 1)); VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE), (lsc_op_num_data_values(op) < 2)); - VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud, 1); + VAL_ASSERT_EQ(inst->components, 1); VAL_ASSERT(!include_helpers); break; default: @@ -336,7 +318,7 @@ brw_validate(const brw_shader &s) case SHADER_OPCODE_MEMORY_LOAD_LOGICAL: case SHADER_OPCODE_MEMORY_STORE_LOGICAL: case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: - validate_memory_logical(s, inst); + validate_memory_logical(s, inst->as_mem()); break; case SHADER_OPCODE_MEMORY_FENCE: