brw: Add brw_mem_inst

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
This commit is contained in:
Caio Oliveira 2025-08-21 16:20:49 -07:00 committed by Marge Bot
parent f0f1e63f99
commit 09a26526cc
9 changed files with 280 additions and 305 deletions

View file

@ -671,7 +671,7 @@ enum get_buffer_size_srcs {
GET_BUFFER_SIZE_SRCS
};
enum memory_logical_mode {
enum ENUM_PACKED memory_logical_mode {
MEMORY_MODE_TYPED,
MEMORY_MODE_UNTYPED,
MEMORY_MODE_SHARED_LOCAL,
@ -680,17 +680,8 @@ enum memory_logical_mode {
};
enum memory_logical_srcs {
/** enum lsc_opcode (as UD immediate) */
MEMORY_LOGICAL_OPCODE,
/** enum memory_logical_mode (as UD immediate) */
MEMORY_LOGICAL_MODE,
/** enum lsc_addr_surface_type (as UD immediate) */
MEMORY_LOGICAL_BINDING_TYPE,
/**
* Where to find the surface state. Depends on BINDING_TYPE above:
* Where to find the surface state. Depends on brw_mem_inst::binding_type:
*
* - SS: pointer to surface state (relative to surface base address)
* - BSS: pointer to surface state (relative to bindless surface base)
@ -702,24 +693,6 @@ enum memory_logical_srcs {
/** Coordinate/address/offset for where to access memory */
MEMORY_LOGICAL_ADDRESS,
/** Xe2+: offset for where to access memory (as UD immediate) */
MEMORY_LOGICAL_ADDRESS_OFFSET,
/** Dimensionality of the "address" source (as UD immediate) */
MEMORY_LOGICAL_COORD_COMPONENTS,
/** Required alignment of address in bytes; 0 for natural alignment */
MEMORY_LOGICAL_ALIGNMENT,
/** Bit-size in the form of enum lsc_data_size (as UD immediate) */
MEMORY_LOGICAL_DATA_SIZE,
/** Number of vector components (as UD immediate) */
MEMORY_LOGICAL_COMPONENTS,
/** memory_flags bitfield (as UD immediate) */
MEMORY_LOGICAL_FLAGS,
/** Data to write for stores or the first operand for atomics */
MEMORY_LOGICAL_DATA0,
@ -1583,7 +1556,7 @@ enum ENUM_PACKED brw_rnd_mode {
* one or two messages with different bits to control things like address
* size, how much data is read/written, etc.
*/
enum lsc_opcode {
enum ENUM_PACKED lsc_opcode {
LSC_OP_LOAD = 0,
LSC_OP_LOAD_CMASK = 2,
LSC_OP_STORE = 4,

View file

@ -33,6 +33,8 @@
#include "util/bitscan.h"
#include "compiler/glsl_types.h"
#include <optional>
struct brw_bind_info {
bool valid;
bool bindless;
@ -4617,42 +4619,42 @@ can_use_instruction_offset(enum lsc_addr_surface_type binding_type, int32_t offs
return offset >= u_intN_min(max_bits) && offset <= u_intN_max(max_bits);
}
static void
set_memory_address(nir_to_brw_state &ntb,
const brw_builder &bld,
nir_intrinsic_instr *instr,
brw_reg *srcs)
static brw_reg
memory_address(nir_to_brw_state &ntb,
const brw_builder &bld,
nir_intrinsic_instr *instr,
enum lsc_addr_surface_type binding_type,
int32_t *address_offset)
{
const intel_device_info *devinfo = ntb.devinfo;
const nir_src *nir_src_offset = nir_get_io_offset_src(instr);
const brw_reg src_offset = get_nir_src_imm(ntb, *nir_src_offset);
const enum lsc_addr_surface_type binding_type =
(enum lsc_addr_surface_type) srcs[MEMORY_LOGICAL_BINDING_TYPE].ud;
const brw_builder ubld = src_offset.is_scalar ? bld.scalar_group() : bld;
brw_reg address;
if (devinfo->ver < 20 ||
(!nir_intrinsic_has_base(instr) && !nir_src_is_const(*nir_src_offset))) {
srcs[MEMORY_LOGICAL_ADDRESS] =
address =
nir_intrinsic_has_base(instr) ?
ubld.ADD(src_offset,
brw_imm_int(src_offset.type, nir_intrinsic_base(instr))) :
src_offset;
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0);
*address_offset = 0;
} else if (!nir_intrinsic_has_base(instr) && nir_src_is_const(*nir_src_offset)) {
const int32_t offset = nir_src_as_int(*nir_src_offset);
if (can_use_instruction_offset(binding_type, offset)) {
srcs[MEMORY_LOGICAL_ADDRESS] = brw_imm_ud(0);
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(offset);
address = brw_imm_ud(0);
*address_offset = offset;
} else {
srcs[MEMORY_LOGICAL_ADDRESS] = src_offset;
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0);
address = src_offset;
*address_offset = 0;
}
} else {
assert(nir_intrinsic_has_base(instr));
const int32_t offset = nir_intrinsic_base(instr);
assert(can_use_instruction_offset(binding_type, offset));
srcs[MEMORY_LOGICAL_ADDRESS] = src_offset;
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(offset);
address = src_offset;
*address_offset = offset;
}
/* If nir_src is_scalar, the MEMORY_LOGICAL_ADDRESS will be allocated at
@ -4661,7 +4663,9 @@ set_memory_address(nir_to_brw_state &ntb,
* properly also ensures that emit_uniformize (below) will handle the value
* as scalar_group() size instead of full dispatch width.
*/
srcs[MEMORY_LOGICAL_ADDRESS].is_scalar = src_offset.is_scalar;
address.is_scalar = src_offset.is_scalar;
return address;
}
static unsigned
@ -4757,22 +4761,20 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb,
cs_prog_data->uses_num_work_groups = true;
brw_reg srcs[MEMORY_LOGICAL_NUM_SRCS];
srcs[MEMORY_LOGICAL_OPCODE] = brw_imm_ud(LSC_OP_LOAD);
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED);
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BTI);
srcs[MEMORY_LOGICAL_BINDING] = brw_imm_ud(0);
srcs[MEMORY_LOGICAL_ADDRESS] = brw_imm_ud(0);
srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud(1);
srcs[MEMORY_LOGICAL_ALIGNMENT] = brw_imm_ud(4);
srcs[MEMORY_LOGICAL_DATA_SIZE] = brw_imm_ud(LSC_DATA_SIZE_D32);
srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(3);
srcs[MEMORY_LOGICAL_FLAGS] = brw_imm_ud(0);
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0);
brw_inst *inst =
brw_mem_inst *mem =
bld.emit(SHADER_OPCODE_MEMORY_LOAD_LOGICAL,
dest, srcs, MEMORY_LOGICAL_NUM_SRCS);
inst->size_written = 3 * s.dispatch_width * 4;
dest, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem();
mem->size_written = 3 * s.dispatch_width * 4;
mem->lsc_op = LSC_OP_LOAD;
mem->mode = MEMORY_MODE_UNTYPED;
mem->binding_type = LSC_ADDR_SURFTYPE_BTI;
mem->data_size = LSC_DATA_SIZE_D32;
mem->coord_components = 1;
mem->components = 3;
mem->alignment = 4;
break;
}
@ -7090,30 +7092,26 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
(nir_intrinsic_access(instr) & ACCESS_COHERENT);
const unsigned align =
nir_intrinsic_has_align(instr) ? nir_intrinsic_align(instr) : 0;
const unsigned logical_flags =
uint8_t flags =
(include_helpers ? MEMORY_FLAG_INCLUDE_HELPERS : 0) |
(volatile_access ? MEMORY_FLAG_VOLATILE_ACCESS : 0) |
(coherent_access ? MEMORY_FLAG_COHERENT_ACCESS : 0);
bool no_mask_handle = false;
int data_src = -1;
srcs[MEMORY_LOGICAL_OPCODE] = brw_imm_ud(op);
/* BINDING_TYPE, BINDING, and ADDRESS are handled in the switch */
srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud(1);
srcs[MEMORY_LOGICAL_ALIGNMENT] = brw_imm_ud(align);
/* DATA_SIZE and CHANNELS are handled below the switch */
srcs[MEMORY_LOGICAL_FLAGS] = brw_imm_ud(logical_flags);
/* DATA0 and DATA1 are handled below */
uint8_t coord_components = 1;
/* Set the default address offset to 0 */
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0);
int32_t address_offset = 0;
std::optional<memory_logical_mode> mode;
std::optional<lsc_addr_surface_type> binding_type;
switch (instr->intrinsic) {
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_store:
case nir_intrinsic_bindless_image_atomic:
case nir_intrinsic_bindless_image_atomic_swap:
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BSS);
binding_type = LSC_ADDR_SURFTYPE_BSS;
FALLTHROUGH;
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
@ -7127,22 +7125,22 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
* message. For TGM it will be 4 (U, V, R, SAMPLE_INDEX)."
*
*/
srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud(
coord_components =
(devinfo->ver >= 30 &&
nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_MS) ? 4 :
nir_image_intrinsic_coord_components(instr));
nir_image_intrinsic_coord_components(instr);
/* MSAA image atomic accesses not supported, must be lowered to UGM */
assert((instr->intrinsic != nir_intrinsic_bindless_image_atomic &&
instr->intrinsic != nir_intrinsic_bindless_image_atomic_swap) ||
nir_intrinsic_image_dim(instr) != GLSL_SAMPLER_DIM_MS);
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_TYPED);
mode = MEMORY_MODE_TYPED;
srcs[MEMORY_LOGICAL_BINDING] =
get_nir_image_intrinsic_image(ntb, bld, instr);
if (srcs[MEMORY_LOGICAL_BINDING_TYPE].file == BAD_FILE)
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BTI);
if (!binding_type.has_value())
binding_type = LSC_ADDR_SURFTYPE_BTI;
srcs[MEMORY_LOGICAL_ADDRESS] = get_nir_src(ntb, instr->src[1], 0);
@ -7150,7 +7148,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
break;
case nir_intrinsic_load_ubo_uniform_block_intel:
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_CONSTANT);
mode = MEMORY_MODE_CONSTANT;
FALLTHROUGH;
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_ssbo_intel:
@ -7161,14 +7159,14 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
case nir_intrinsic_load_ssbo_block_intel:
case nir_intrinsic_store_ssbo_block_intel:
case nir_intrinsic_load_ssbo_uniform_block_intel:
if (srcs[MEMORY_LOGICAL_MODE].file == BAD_FILE)
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED);
srcs[MEMORY_LOGICAL_BINDING_TYPE] =
brw_imm_ud(get_nir_src_bindless(ntb, instr->src[is_store ? 1 : 0]) ?
LSC_ADDR_SURFTYPE_BSS : LSC_ADDR_SURFTYPE_BTI);
if (!mode.has_value())
mode = MEMORY_MODE_UNTYPED;
binding_type = get_nir_src_bindless(ntb, instr->src[is_store ? 1 : 0]) ?
LSC_ADDR_SURFTYPE_BSS : LSC_ADDR_SURFTYPE_BTI;
srcs[MEMORY_LOGICAL_BINDING] =
get_nir_buffer_intrinsic_index(ntb, bld, instr, &no_mask_handle);
set_memory_address(ntb, bld, instr, srcs);
srcs[MEMORY_LOGICAL_ADDRESS] =
memory_address(ntb, bld, instr, *binding_type, &address_offset);
data_src = is_atomic ? 2 : 0;
break;
case nir_intrinsic_load_shared:
@ -7178,21 +7176,22 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
case nir_intrinsic_load_shared_block_intel:
case nir_intrinsic_store_shared_block_intel:
case nir_intrinsic_load_shared_uniform_block_intel: {
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_SHARED_LOCAL);
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT);
set_memory_address(ntb, bld, instr, srcs);
mode = MEMORY_MODE_SHARED_LOCAL;
binding_type = LSC_ADDR_SURFTYPE_FLAT;
srcs[MEMORY_LOGICAL_ADDRESS] =
memory_address(ntb, bld, instr, *binding_type, &address_offset);
data_src = is_atomic ? 1 : 0;
no_mask_handle = true;
break;
}
case nir_intrinsic_load_scratch:
case nir_intrinsic_store_scratch: {
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_SCRATCH);
mode = MEMORY_MODE_SCRATCH;
const nir_src &addr = instr->src[is_store ? 1 : 0];
if (devinfo->verx10 >= 125) {
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_SS);
binding_type = LSC_ADDR_SURFTYPE_SS;
const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0);
brw_reg bind = ubld.AND(retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
@ -7214,8 +7213,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
/* load_scratch / store_scratch cannot be is_scalar yet. */
assert(xbld.dispatch_width() == bld.dispatch_width());
srcs[MEMORY_LOGICAL_BINDING_TYPE] =
brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT);
binding_type = LSC_ADDR_SURFTYPE_FLAT;
srcs[MEMORY_LOGICAL_ADDRESS] =
swizzle_nir_scratch_addr(ntb, bld, addr, dword_aligned);
}
@ -7237,9 +7235,10 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
case nir_intrinsic_global_atomic_swap:
case nir_intrinsic_load_global_block_intel:
case nir_intrinsic_store_global_block_intel:
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED);
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT);
set_memory_address(ntb, bld, instr, srcs);
mode = MEMORY_MODE_UNTYPED;
binding_type = LSC_ADDR_SURFTYPE_FLAT;
srcs[MEMORY_LOGICAL_ADDRESS] =
memory_address(ntb, bld, instr, *binding_type, &address_offset);
data_src = is_atomic ? 1 : 0;
no_mask_handle = srcs[MEMORY_LOGICAL_ADDRESS].is_scalar;
break;
@ -7253,15 +7252,11 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
if (components == 0)
components = instr->num_components;
srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(components);
const unsigned nir_bit_size =
is_store ? instr->src[data_src].ssa->bit_size : instr->def.bit_size;
enum lsc_data_size data_size = lsc_bits_to_data_size(nir_bit_size);
const enum lsc_data_size data_size = lsc_bits_to_data_size(nir_bit_size);
uint32_t data_bit_size = lsc_data_size_bytes(data_size) * 8;
srcs[MEMORY_LOGICAL_DATA_SIZE] = brw_imm_ud(data_size);
const brw_reg_type data_type =
brw_type_with_size(BRW_TYPE_UD, data_bit_size);
const brw_reg_type nir_data_type =
@ -7310,11 +7305,20 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
instr->intrinsic == nir_intrinsic_store_shared_block_intel ||
instr->intrinsic == nir_intrinsic_store_ssbo_block_intel;
brw_inst *inst;
brw_mem_inst *mem;
if (!block) {
inst = xbld.emit(opcode, dest, srcs, MEMORY_LOGICAL_NUM_SRCS);
inst->size_written *= components;
mem = xbld.emit(opcode, dest, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem();
mem->size_written *= components;
mem->lsc_op = op;
mem->mode = *mode;
mem->binding_type = *binding_type;
mem->address_offset = address_offset;
mem->coord_components = coord_components;
mem->data_size = data_size;
mem->components = components;
mem->alignment = align;
mem->flags = flags;
if (dest.file != BAD_FILE && data_bit_size > nir_bit_size) {
/* Shrink e.g. D16U32 result back to D16 */
@ -7326,8 +7330,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
} else {
assert(nir_bit_size == 32);
srcs[MEMORY_LOGICAL_FLAGS] =
brw_imm_ud(MEMORY_FLAG_TRANSPOSE | srcs[MEMORY_LOGICAL_FLAGS].ud);
flags |= MEMORY_FLAG_TRANSPOSE;
srcs[MEMORY_LOGICAL_ADDRESS] =
bld.emit_uniformize(srcs[MEMORY_LOGICAL_ADDRESS]);
@ -7344,8 +7347,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
*/
if (srcs[MEMORY_LOGICAL_ADDRESS].file == IMM &&
align >= data_bit_size / 8 &&
(devinfo->has_lsc ||
srcs[MEMORY_LOGICAL_MODE].ud != MEMORY_MODE_SHARED_LOCAL)) {
(devinfo->has_lsc || mode != MEMORY_MODE_SHARED_LOCAL)) {
first_read_component = nir_def_first_component_read(&instr->def);
unsigned last_component = nir_def_last_component_read(&instr->def);
srcs[MEMORY_LOGICAL_ADDRESS].u64 +=
@ -7368,8 +7370,6 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
block_comps = choose_block_size_dwords(devinfo, total - done);
const unsigned block_bytes = block_comps * (nir_bit_size / 8);
srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(block_comps);
brw_reg dst_offset = is_store ? brw_reg() :
retype(byte_offset(dest, done * 4), BRW_TYPE_UD);
if (is_store) {
@ -7377,10 +7377,19 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
retype(byte_offset(src, done * 4), BRW_TYPE_UD);
}
inst = ubld.emit(opcode, dst_offset, srcs, MEMORY_LOGICAL_NUM_SRCS);
inst->has_no_mask_send_params = no_mask_handle;
mem = ubld.emit(opcode, dst_offset, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem();
mem->has_no_mask_send_params = no_mask_handle;
if (is_load)
inst->size_written = block_bytes;
mem->size_written = block_bytes;
mem->lsc_op = op;
mem->mode = *mode;
mem->binding_type = *binding_type;
mem->address_offset = address_offset;
mem->coord_components = coord_components;
mem->data_size = data_size;
mem->components = block_comps;
mem->alignment = align;
mem->flags = flags;
if (brw_type_size_bits(srcs[MEMORY_LOGICAL_ADDRESS].type) == 64) {
increment_a64_address(ubld, srcs[MEMORY_LOGICAL_ADDRESS],

View file

@ -15,6 +15,7 @@ static inline unsigned
brw_inst_kind_size(brw_inst_kind kind)
{
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_tex_inst));
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_mem_inst));
/* TODO: Temporarily here to ensure all instructions can be converted to
* SEND. Once all new kinds are added, change so that BASE allocate only
@ -178,6 +179,11 @@ brw_inst_kind_for_opcode(enum opcode opcode)
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
return BRW_KIND_TEX;
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
return BRW_KIND_MEM;
default:
return BRW_KIND_BASE;
}
@ -221,14 +227,6 @@ brw_inst::is_control_source(unsigned arg) const
case SHADER_OPCODE_SEND_GATHER:
return arg < SEND_SRC_PAYLOAD1;
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
return arg != MEMORY_LOGICAL_BINDING &&
arg != MEMORY_LOGICAL_ADDRESS &&
arg != MEMORY_LOGICAL_DATA0 &&
arg != MEMORY_LOGICAL_DATA1;
case SHADER_OPCODE_QUAD_SWAP:
case SHADER_OPCODE_INCLUSIVE_SCAN:
case SHADER_OPCODE_EXCLUSIVE_SCAN:
@ -492,13 +490,15 @@ brw_inst::components_read(unsigned i) const
if (i == MEMORY_LOGICAL_DATA1)
return 0;
FALLTHROUGH;
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: {
const brw_mem_inst *mem = as_mem();
if (i == MEMORY_LOGICAL_DATA0 || i == MEMORY_LOGICAL_DATA1)
return src[MEMORY_LOGICAL_COMPONENTS].ud;
return mem->components;
else if (i == MEMORY_LOGICAL_ADDRESS)
return src[MEMORY_LOGICAL_COORD_COMPONENTS].ud;
return mem->coord_components;
else
return 1;
}
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
return (i == 0 ? 2 : 1);
@ -946,8 +946,7 @@ brw_inst::is_volatile() const
case SHADER_OPCODE_LOAD_REG:
return true;
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
assert(sources > MEMORY_LOGICAL_FLAGS);
return src[MEMORY_LOGICAL_FLAGS].ud & MEMORY_FLAG_VOLATILE_ACCESS;
return as_mem()->flags & MEMORY_FLAG_VOLATILE_ACCESS;
case SHADER_OPCODE_SEND:
case SHADER_OPCODE_SEND_GATHER:
return as_send()->is_volatile;

View file

@ -43,6 +43,7 @@ enum ENUM_PACKED brw_inst_kind {
BRW_KIND_BASE,
BRW_KIND_SEND,
BRW_KIND_TEX,
BRW_KIND_MEM,
};
brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
@ -70,6 +71,7 @@ struct brw_inst : brw_exec_node {
KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND);
KIND_HELPERS(as_tex, brw_tex_inst, BRW_KIND_TEX);
KIND_HELPERS(as_mem, brw_mem_inst, BRW_KIND_MEM);
#undef KIND_HELPERS
@ -278,6 +280,22 @@ struct brw_tex_inst : brw_inst {
bool residency;
};
struct brw_mem_inst : brw_inst {
enum lsc_opcode lsc_op;
enum memory_logical_mode mode;
enum lsc_addr_surface_type binding_type;
enum lsc_data_size data_size;
uint8_t coord_components;
uint8_t components;
uint8_t flags;
/** Required alignment of address in bytes; 0 for natural alignment */
uint32_t alignment;
int32_t address_offset;
};
/**
* Make the execution of \p inst dependent on the evaluation of a possibly
* inverted predicate.

View file

@ -1478,48 +1478,35 @@ lsc_addr_size_for_type(enum brw_reg_type type)
}
static void
lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
{
const intel_device_info *devinfo = bld.shader->devinfo;
assert(devinfo->has_lsc);
assert(inst->src[MEMORY_LOGICAL_OPCODE].file == IMM);
assert(inst->src[MEMORY_LOGICAL_MODE].file == IMM);
assert(inst->src[MEMORY_LOGICAL_BINDING_TYPE].file == IMM);
assert(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].file == IMM);
assert(inst->src[MEMORY_LOGICAL_DATA_SIZE].file == IMM);
assert(inst->src[MEMORY_LOGICAL_FLAGS].file == IMM);
/* Get the logical send arguments. */
const enum lsc_opcode op = (lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
const enum memory_logical_mode mode =
(enum memory_logical_mode) inst->src[MEMORY_LOGICAL_MODE].ud;
const enum lsc_addr_surface_type binding_type =
(enum lsc_addr_surface_type) inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud;
const brw_reg binding = inst->src[MEMORY_LOGICAL_BINDING];
const brw_reg addr = inst->src[MEMORY_LOGICAL_ADDRESS];
const unsigned coord_components =
inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].ud;
enum lsc_data_size data_size =
(enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
const unsigned components = inst->src[MEMORY_LOGICAL_COMPONENTS].ud;
const enum memory_flags flags =
(enum memory_flags) inst->src[MEMORY_LOGICAL_FLAGS].ud;
const bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
const bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
const bool volatile_access = flags & MEMORY_FLAG_VOLATILE_ACCESS;
const bool coherent_access = flags & MEMORY_FLAG_COHERENT_ACCESS;
const brw_reg data0 = inst->src[MEMORY_LOGICAL_DATA0];
const brw_reg data1 = inst->src[MEMORY_LOGICAL_DATA1];
const bool has_side_effects = inst->has_side_effects();
const brw_reg binding = mem->src[MEMORY_LOGICAL_BINDING];
const brw_reg addr = mem->src[MEMORY_LOGICAL_ADDRESS];
const brw_reg data0 = mem->src[MEMORY_LOGICAL_DATA0];
const brw_reg data1 = mem->src[MEMORY_LOGICAL_DATA1];
const enum lsc_opcode op = mem->lsc_op;
const enum memory_logical_mode mode = mem->mode;
const enum lsc_addr_surface_type binding_type = mem->binding_type;
const unsigned coord_components = mem->coord_components;
enum lsc_data_size data_size = mem->data_size;
const unsigned components = mem->components;
const bool transpose = mem->flags & MEMORY_FLAG_TRANSPOSE;
const bool include_helpers = mem->flags & MEMORY_FLAG_INCLUDE_HELPERS;
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
const bool coherent_access = mem->flags & MEMORY_FLAG_COHERENT_ACCESS;
const bool has_side_effects = mem->has_side_effects();
const uint32_t data_size_B = lsc_data_size_bytes(data_size);
const enum brw_reg_type data_type =
brw_type_with_size(data0.type, data_size_B * 8);
const enum lsc_addr_size addr_size = lsc_addr_size_for_type(addr.type);
assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM);
const int32_t base_offset = inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d;
const int32_t base_offset = mem->address_offset;
/**
* TGM messages cannot have a base offset
@ -1530,9 +1517,9 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
brw_reg payload = addr;
if (addr.file != VGRF || !addr.is_contiguous()) {
if (inst->force_writemask_all) {
if (mem->force_writemask_all) {
const brw_builder dbld =
inst->exec_size == 1 ?
mem->exec_size == 1 ?
bld.scalar_group() :
bld.group(bld.shader->dispatch_width, 0);
payload = dbld.move_to_vgrf(addr, coord_components);
@ -1565,7 +1552,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
payload2 = bld.vgrf(data0.type, size);
bld.LOAD_PAYLOAD(payload2, data, size, 0);
ex_mlen = (size * brw_type_size_bytes(data_type) * inst->exec_size) / REG_SIZE;
ex_mlen = (size * brw_type_size_bytes(data_type) * mem->exec_size) / REG_SIZE;
}
}
@ -1616,13 +1603,13 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
*/
if (bld.shader->stage == MESA_SHADER_FRAGMENT && !transpose) {
if (include_helpers)
emit_predicate_on_vector_mask(bld, inst);
emit_predicate_on_vector_mask(bld, mem);
else if (has_side_effects && mode != MEMORY_MODE_SCRATCH)
brw_emit_predicate_on_sample_mask(bld, inst);
brw_emit_predicate_on_sample_mask(bld, mem);
}
brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
inst = NULL;
brw_send_inst *send = brw_transform_inst_to_send(bld, mem);
mem = NULL;
switch (mode) {
case MEMORY_MODE_UNTYPED:
@ -1704,42 +1691,29 @@ emit_a64_oword_block_header(const brw_builder &bld, const brw_reg &addr)
}
static void
lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
{
const intel_device_info *devinfo = bld.shader->devinfo;
const brw_compiler *compiler = bld.shader->compiler;
assert(inst->src[MEMORY_LOGICAL_OPCODE].file == IMM);
assert(inst->src[MEMORY_LOGICAL_MODE].file == IMM);
assert(inst->src[MEMORY_LOGICAL_BINDING_TYPE].file == IMM);
assert(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].file == IMM);
assert(inst->src[MEMORY_LOGICAL_DATA_SIZE].file == IMM);
assert(inst->src[MEMORY_LOGICAL_FLAGS].file == IMM);
assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM);
/* Get the logical send arguments. */
const enum lsc_opcode op = (lsc_opcode)inst->src[MEMORY_LOGICAL_OPCODE].ud;
const enum memory_logical_mode mode =
(enum memory_logical_mode) inst->src[MEMORY_LOGICAL_MODE].ud;
enum lsc_addr_surface_type binding_type =
(enum lsc_addr_surface_type) inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud;
brw_reg binding = inst->src[MEMORY_LOGICAL_BINDING];
const brw_reg addr = inst->src[MEMORY_LOGICAL_ADDRESS];
const unsigned coord_components =
inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].ud;
const unsigned alignment = inst->src[MEMORY_LOGICAL_ALIGNMENT].ud;
const unsigned components = inst->src[MEMORY_LOGICAL_COMPONENTS].ud;
const enum memory_flags flags =
(enum memory_flags) inst->src[MEMORY_LOGICAL_FLAGS].ud;
const bool block = flags & MEMORY_FLAG_TRANSPOSE;
const bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
const bool volatile_access = flags & MEMORY_FLAG_VOLATILE_ACCESS;
const brw_reg data0 = inst->src[MEMORY_LOGICAL_DATA0];
const brw_reg data1 = inst->src[MEMORY_LOGICAL_DATA1];
const bool has_side_effects = inst->has_side_effects();
const bool has_dest = inst->dst.file != BAD_FILE && !inst->dst.is_null();
assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM &&
inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d == 0);
brw_reg binding = mem->src[MEMORY_LOGICAL_BINDING];
const brw_reg addr = mem->src[MEMORY_LOGICAL_ADDRESS];
const brw_reg data0 = mem->src[MEMORY_LOGICAL_DATA0];
const brw_reg data1 = mem->src[MEMORY_LOGICAL_DATA1];
const enum lsc_opcode op = mem->lsc_op;
const enum memory_logical_mode mode = mem->mode;
enum lsc_addr_surface_type binding_type = mem->binding_type;
const unsigned coord_components = mem->coord_components;
const unsigned alignment = mem->alignment;
const unsigned components = mem->components;
const bool block = mem->flags & MEMORY_FLAG_TRANSPOSE;
const bool include_helpers = mem->flags & MEMORY_FLAG_INCLUDE_HELPERS;
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
const bool has_side_effects = mem->has_side_effects();
const bool has_dest = mem->dst.file != BAD_FILE && !mem->dst.is_null();
assert(mem->address_offset == 0);
/* Don't predicate scratch writes on the sample mask. Otherwise,
* FS helper invocations would load undefined values from scratch memory.
@ -1749,8 +1723,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
*/
bool allow_sample_mask = has_side_effects && mode != MEMORY_MODE_SCRATCH;
const enum lsc_data_size data_size =
(enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
const enum lsc_data_size data_size = mem->data_size;
/* unpadded data size */
const uint32_t data_bit_size =
@ -1801,10 +1774,10 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
*/
if (bld.shader->stage == MESA_SHADER_FRAGMENT) {
if (include_helpers)
emit_predicate_on_vector_mask(bld, inst);
emit_predicate_on_vector_mask(bld, mem);
else if (allow_sample_mask &&
(header.file == BAD_FILE || !surface_access))
brw_emit_predicate_on_sample_mask(bld, inst);
brw_emit_predicate_on_sample_mask(bld, mem);
}
brw_reg payload, payload2;
@ -1870,12 +1843,12 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
sfid = BRW_SFID_HDC1;
if (lsc_opcode_is_atomic(op)) {
desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group,
desc = brw_dp_typed_atomic_desc(devinfo, mem->exec_size, mem->group,
lsc_op_to_legacy_atomic(op),
has_dest);
} else {
desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size,
inst->group, components, !has_dest);
desc = brw_dp_typed_surface_rw_desc(devinfo, mem->exec_size,
mem->group, components, !has_dest);
}
} else if (mode == MEMORY_MODE_CONSTANT) {
assert(block); /* non-block loads not yet handled */
@ -1891,11 +1864,11 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
if (lsc_opcode_is_atomic(op)) {
unsigned aop = lsc_op_to_legacy_atomic(op);
if (lsc_opcode_is_atomic_float(op)) {
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, mem->exec_size,
data_bit_size, aop,
has_dest);
} else {
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size,
desc = brw_dp_a64_untyped_atomic_desc(devinfo, mem->exec_size,
data_bit_size, aop,
has_dest);
}
@ -1903,10 +1876,10 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
desc = brw_dp_a64_oword_block_rw_desc(devinfo, oword_aligned,
components, !has_dest);
} else if (byte_scattered) {
desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, inst->exec_size,
desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, mem->exec_size,
data_bit_size, !has_dest);
} else {
desc = brw_dp_a64_untyped_surface_rw_desc(devinfo, inst->exec_size,
desc = brw_dp_a64_untyped_surface_rw_desc(devinfo, mem->exec_size,
components, !has_dest);
}
} else {
@ -1917,31 +1890,31 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
if (lsc_opcode_is_atomic(op)) {
unsigned aop = lsc_op_to_legacy_atomic(op);
if (lsc_opcode_is_atomic_float(op)) {
desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size,
desc = brw_dp_untyped_atomic_float_desc(devinfo, mem->exec_size,
aop, has_dest);
} else {
desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size,
desc = brw_dp_untyped_atomic_desc(devinfo, mem->exec_size,
aop, has_dest);
}
} else if (block) {
desc = brw_dp_oword_block_rw_desc(devinfo, oword_aligned,
components, !has_dest);
} else if (byte_scattered) {
desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
desc = brw_dp_byte_scattered_rw_desc(devinfo, mem->exec_size,
data_bit_size, !has_dest);
} else if (dword_scattered) {
desc = brw_dp_dword_scattered_rw_desc(devinfo, inst->exec_size,
desc = brw_dp_dword_scattered_rw_desc(devinfo, mem->exec_size,
!has_dest);
} else {
desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
desc = brw_dp_untyped_surface_rw_desc(devinfo, mem->exec_size,
components, !has_dest);
}
}
assert(sfid);
brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
inst = NULL;
brw_send_inst *send = brw_transform_inst_to_send(bld, mem);
mem = NULL;
send->sfid = sfid;
send->mlen = mlen;
@ -2682,14 +2655,15 @@ brw_lower_logical_sends(brw_shader &s)
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: {
brw_mem_inst *mem = inst->as_mem();
if (devinfo->ver >= 20 ||
(devinfo->has_lsc &&
inst->src[MEMORY_LOGICAL_MODE].ud != MEMORY_MODE_TYPED))
lower_lsc_memory_logical_send(ibld, inst);
(devinfo->has_lsc && mem->mode != MEMORY_MODE_TYPED))
lower_lsc_memory_logical_send(ibld, mem);
else
lower_hdc_memory_logical_send(ibld, inst);
lower_hdc_memory_logical_send(ibld, mem);
break;
}
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
if (devinfo->has_lsc && !s.compiler->indirect_ubos_use_sampler)

View file

@ -376,21 +376,23 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst)
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
if (devinfo->ver >= 20)
return inst->exec_size;
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: {
const brw_mem_inst *mem = inst->as_mem();
if (inst->src[MEMORY_LOGICAL_MODE].ud == MEMORY_MODE_TYPED)
if (devinfo->ver >= 20)
return mem->exec_size;
if (mem->mode == MEMORY_MODE_TYPED)
return 8;
/* HDC A64 atomics are limited to SIMD8 */
if (!devinfo->has_lsc &&
inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT
&& lsc_opcode_is_atomic((enum lsc_opcode)
inst->src[MEMORY_LOGICAL_OPCODE].ud))
mem->binding_type == LSC_ADDR_SURFTYPE_FLAT &&
lsc_opcode_is_atomic(mem->lsc_op))
return 8;
return MIN2(16, inst->exec_size);
return MIN2(16, mem->exec_size);
}
/* On gfx12 parameters are fixed to 16-bit values and therefore they all
* always fit regardless of the execution size.

View file

@ -131,7 +131,7 @@ is_expression(const brw_shader *v, const brw_inst *const inst)
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
return true;
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
return inst->src[MEMORY_LOGICAL_MODE].ud == MEMORY_MODE_CONSTANT;
return inst->as_mem()->mode == MEMORY_MODE_CONSTANT;
case SHADER_OPCODE_LOAD_PAYLOAD:
return !is_coalescing_payload(*v, inst);
case SHADER_OPCODE_SEND:
@ -259,6 +259,20 @@ tex_inst_match(brw_tex_inst *a, brw_tex_inst *b)
a->residency == b->residency;
}
static bool
mem_inst_match(brw_mem_inst *a, brw_mem_inst *b)
{
return a->lsc_op == b->lsc_op &&
a->mode == b->mode &&
a->binding_type == b->binding_type &&
a->data_size == b->data_size &&
a->coord_components == b->coord_components &&
a->components == b->components &&
a->flags == b->flags &&
a->alignment == b->alignment &&
a->address_offset == b->address_offset;
}
static bool
instructions_match(brw_inst *a, brw_inst *b, bool *negate)
{
@ -267,6 +281,7 @@ instructions_match(brw_inst *a, brw_inst *b, bool *negate)
/* `kind` is derived from opcode, so skipped. */
(a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) &&
(a->kind != BRW_KIND_TEX || tex_inst_match(a->as_tex(), b->as_tex())) &&
(a->kind != BRW_KIND_MEM || mem_inst_match(a->as_mem(), b->as_mem())) &&
a->exec_size == b->exec_size &&
a->group == b->group &&
a->predicate == b->predicate &&
@ -362,6 +377,26 @@ hash_inst(const void *v)
break;
}
case BRW_KIND_MEM: {
const brw_mem_inst *mem = inst->as_mem();
const uint8_t mem_u8data[] = {
mem->lsc_op,
mem->mode,
mem->binding_type,
mem->data_size,
mem->coord_components,
mem->components,
mem->flags,
};
const uint32_t mem_u32data[] = {
(uint32_t)mem->address_offset,
mem->alignment,
};
hash = HASH(hash, mem_u8data);
hash = HASH(hash, mem_u32data);
break;
}
case BRW_KIND_BASE:
/* Nothing else to do. */
break;

View file

@ -321,61 +321,17 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
static bool
print_memory_logical_source(FILE *file, const brw_inst *inst, unsigned i)
{
if (inst->is_control_source(i)) {
assert(inst->src[i].file == IMM &&
(inst->src[i].type == BRW_TYPE_UD ||
inst->src[i].type == BRW_TYPE_D));
assert(!inst->src[i].negate);
assert(!inst->src[i].abs);
}
switch (i) {
case MEMORY_LOGICAL_OPCODE:
fprintf(file, " %s", brw_lsc_op_to_string(inst->src[i].ud));
return true;
case MEMORY_LOGICAL_MODE: {
static const char *modes[] = {
[MEMORY_MODE_TYPED] = "typed",
[MEMORY_MODE_UNTYPED] = "untyped",
[MEMORY_MODE_SHARED_LOCAL] = "shared",
[MEMORY_MODE_SCRATCH] = "scratch",
[MEMORY_MODE_CONSTANT] = "const",
};
assert(inst->src[i].ud < ARRAY_SIZE(modes));
fprintf(file, " %s", modes[inst->src[i].ud]);
return true;
}
case MEMORY_LOGICAL_BINDING_TYPE:
fprintf(file, " %s", brw_lsc_addr_surftype_to_string(inst->src[i].ud));
if (inst->src[i].ud != LSC_ADDR_SURFTYPE_FLAT)
case MEMORY_LOGICAL_BINDING: {
lsc_addr_surface_type binding_type = inst->as_mem()->binding_type;
fprintf(file, " %s", brw_lsc_addr_surftype_to_string(binding_type));
if (binding_type != LSC_ADDR_SURFTYPE_FLAT)
fprintf(file, ":");
return true;
case MEMORY_LOGICAL_BINDING:
return inst->src[i].file == BAD_FILE;
}
case MEMORY_LOGICAL_ADDRESS:
fprintf(file, " addr: ");
return false;
case MEMORY_LOGICAL_ADDRESS_OFFSET:
fprintf(file, " offset: ");
return false;
case MEMORY_LOGICAL_COORD_COMPONENTS:
fprintf(file, " coord_comps:");
return false;
case MEMORY_LOGICAL_ALIGNMENT:
fprintf(file, " align:");
return false;
case MEMORY_LOGICAL_DATA_SIZE:
fprintf(file, " %s", brw_lsc_data_size_to_string(inst->src[i].ud));
return true;
case MEMORY_LOGICAL_COMPONENTS:
fprintf(file, " comps:");
return false;
case MEMORY_LOGICAL_FLAGS:
if (inst->src[i].ud & MEMORY_FLAG_TRANSPOSE)
fprintf(file, " transpose");
if (inst->src[i].ud & MEMORY_FLAG_INCLUDE_HELPERS)
fprintf(file, " helpers");
return true;
case MEMORY_LOGICAL_DATA0:
fprintf(file, " data0: ");
return false;
@ -499,10 +455,37 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type));
}
const brw_mem_inst *mem = inst->as_mem();
if (mem) {
fprintf(file, " %s", brw_lsc_op_to_string(mem->lsc_op));
static const char *modes[] = {
[MEMORY_MODE_TYPED] = "typed",
[MEMORY_MODE_UNTYPED] = "untyped",
[MEMORY_MODE_SHARED_LOCAL] = "shared",
[MEMORY_MODE_SCRATCH] = "scratch",
[MEMORY_MODE_CONSTANT] = "const",
};
assert(mem->mode < ARRAY_SIZE(modes));
fprintf(file, " %s", modes[mem->mode]);
fprintf(file, " offset: %dd", mem->address_offset);
fprintf(file, " coord_comps: %uu", mem->coord_components);
fprintf(file, " %s", brw_lsc_data_size_to_string(mem->data_size));
fprintf(file, " comps: %uu", mem->components);
fprintf(file, " align: %uu", mem->alignment);
if (mem->flags & MEMORY_FLAG_TRANSPOSE)
fprintf(file, " transpose");
if (mem->flags & MEMORY_FLAG_INCLUDE_HELPERS)
fprintf(file, " helpers");
if (mem->flags & MEMORY_FLAG_VOLATILE_ACCESS)
fprintf(file, " volatile");
if (mem->flags & MEMORY_FLAG_COHERENT_ACCESS)
fprintf(file, " coherent");
}
for (int i = 0; i < inst->sources; i++) {
if (inst->opcode == SHADER_OPCODE_MEMORY_LOAD_LOGICAL ||
inst->opcode == SHADER_OPCODE_MEMORY_STORE_LOGICAL ||
inst->opcode == SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL) {
if (mem) {
if (print_memory_logical_source(file, inst, i))
continue;
} else {

View file

@ -90,36 +90,18 @@ is_ud_imm(const brw_reg &reg)
return reg.file == IMM && reg.type == BRW_TYPE_UD;
}
static inline bool
is_d_imm(const brw_reg &reg)
{
return reg.file == IMM && reg.type == BRW_TYPE_D;
}
static void
validate_memory_logical(const brw_shader &s, const brw_inst *inst)
validate_memory_logical(const brw_shader &s, const brw_mem_inst *inst)
{
const intel_device_info *devinfo = s.devinfo;
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS]));
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS]));
VAL_ASSERT(is_d_imm(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET]));
enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud;
enum lsc_opcode op = inst->lsc_op;
enum memory_flags flags = (memory_flags)inst->flags;
bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
enum memory_logical_mode mode =
(memory_logical_mode)inst->src[MEMORY_LOGICAL_MODE].ud;
enum memory_logical_mode mode = inst->mode;
enum lsc_data_size data_size =
(enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
enum lsc_data_size data_size = inst->data_size;
unsigned data_size_B = lsc_data_size_bytes(data_size);
if (!devinfo->has_lsc) {
@ -131,18 +113,18 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst)
if (transpose) {
const unsigned min_alignment =
mode == MEMORY_MODE_SHARED_LOCAL ? 16 : 4;
VAL_ASSERT_GE(inst->src[MEMORY_LOGICAL_ALIGNMENT].ud, min_alignment);
VAL_ASSERT_GE(inst->alignment, min_alignment);
}
}
VAL_ASSERT(!transpose || !include_helpers);
VAL_ASSERT(!transpose || lsc_opcode_has_transpose(op));
if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT)
if (inst->binding_type == LSC_ADDR_SURFTYPE_FLAT)
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_BINDING].file, BAD_FILE);
if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) {
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud,
VAL_ASSERT_EQ(inst->components,
inst->components_read(MEMORY_LOGICAL_DATA1));
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_DATA0].type,
@ -150,7 +132,7 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst)
}
if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) {
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud,
VAL_ASSERT_EQ(inst->components,
inst->components_read(MEMORY_LOGICAL_DATA0));
VAL_ASSERT_EQ(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type),
@ -162,10 +144,10 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst)
/** TGM messages cannot have a base offset */
if (mode == MEMORY_MODE_TYPED)
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d, 0);
VAL_ASSERT_EQ(inst->as_mem()->address_offset, 0);
/* Offset must be DWord aligned */
VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d % 4), 0);
VAL_ASSERT_EQ((inst->as_mem()->address_offset % 4), 0);
switch (inst->opcode) {
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
@ -185,7 +167,7 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst)
(lsc_op_num_data_values(op) < 1));
VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE),
(lsc_op_num_data_values(op) < 2));
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud, 1);
VAL_ASSERT_EQ(inst->components, 1);
VAL_ASSERT(!include_helpers);
break;
default:
@ -336,7 +318,7 @@ brw_validate(const brw_shader &s)
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
validate_memory_logical(s, inst);
validate_memory_logical(s, inst->as_mem());
break;
case SHADER_OPCODE_MEMORY_FENCE: