mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 20:30:12 +01:00
brw: Add brw_mem_inst
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
This commit is contained in:
parent
f0f1e63f99
commit
09a26526cc
9 changed files with 280 additions and 305 deletions
|
|
@ -671,7 +671,7 @@ enum get_buffer_size_srcs {
|
|||
GET_BUFFER_SIZE_SRCS
|
||||
};
|
||||
|
||||
enum memory_logical_mode {
|
||||
enum ENUM_PACKED memory_logical_mode {
|
||||
MEMORY_MODE_TYPED,
|
||||
MEMORY_MODE_UNTYPED,
|
||||
MEMORY_MODE_SHARED_LOCAL,
|
||||
|
|
@ -680,17 +680,8 @@ enum memory_logical_mode {
|
|||
};
|
||||
|
||||
enum memory_logical_srcs {
|
||||
/** enum lsc_opcode (as UD immediate) */
|
||||
MEMORY_LOGICAL_OPCODE,
|
||||
|
||||
/** enum memory_logical_mode (as UD immediate) */
|
||||
MEMORY_LOGICAL_MODE,
|
||||
|
||||
/** enum lsc_addr_surface_type (as UD immediate) */
|
||||
MEMORY_LOGICAL_BINDING_TYPE,
|
||||
|
||||
/**
|
||||
* Where to find the surface state. Depends on BINDING_TYPE above:
|
||||
* Where to find the surface state. Depends on brw_mem_inst::binding_type:
|
||||
*
|
||||
* - SS: pointer to surface state (relative to surface base address)
|
||||
* - BSS: pointer to surface state (relative to bindless surface base)
|
||||
|
|
@ -702,24 +693,6 @@ enum memory_logical_srcs {
|
|||
/** Coordinate/address/offset for where to access memory */
|
||||
MEMORY_LOGICAL_ADDRESS,
|
||||
|
||||
/** Xe2+: offset for where to access memory (as UD immediate) */
|
||||
MEMORY_LOGICAL_ADDRESS_OFFSET,
|
||||
|
||||
/** Dimensionality of the "address" source (as UD immediate) */
|
||||
MEMORY_LOGICAL_COORD_COMPONENTS,
|
||||
|
||||
/** Required alignment of address in bytes; 0 for natural alignment */
|
||||
MEMORY_LOGICAL_ALIGNMENT,
|
||||
|
||||
/** Bit-size in the form of enum lsc_data_size (as UD immediate) */
|
||||
MEMORY_LOGICAL_DATA_SIZE,
|
||||
|
||||
/** Number of vector components (as UD immediate) */
|
||||
MEMORY_LOGICAL_COMPONENTS,
|
||||
|
||||
/** memory_flags bitfield (as UD immediate) */
|
||||
MEMORY_LOGICAL_FLAGS,
|
||||
|
||||
/** Data to write for stores or the first operand for atomics */
|
||||
MEMORY_LOGICAL_DATA0,
|
||||
|
||||
|
|
@ -1583,7 +1556,7 @@ enum ENUM_PACKED brw_rnd_mode {
|
|||
* one or two messages with different bits to control things like address
|
||||
* size, how much data is read/written, etc.
|
||||
*/
|
||||
enum lsc_opcode {
|
||||
enum ENUM_PACKED lsc_opcode {
|
||||
LSC_OP_LOAD = 0,
|
||||
LSC_OP_LOAD_CMASK = 2,
|
||||
LSC_OP_STORE = 4,
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@
|
|||
#include "util/bitscan.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
|
||||
#include <optional>
|
||||
|
||||
struct brw_bind_info {
|
||||
bool valid;
|
||||
bool bindless;
|
||||
|
|
@ -4617,42 +4619,42 @@ can_use_instruction_offset(enum lsc_addr_surface_type binding_type, int32_t offs
|
|||
return offset >= u_intN_min(max_bits) && offset <= u_intN_max(max_bits);
|
||||
}
|
||||
|
||||
static void
|
||||
set_memory_address(nir_to_brw_state &ntb,
|
||||
const brw_builder &bld,
|
||||
nir_intrinsic_instr *instr,
|
||||
brw_reg *srcs)
|
||||
static brw_reg
|
||||
memory_address(nir_to_brw_state &ntb,
|
||||
const brw_builder &bld,
|
||||
nir_intrinsic_instr *instr,
|
||||
enum lsc_addr_surface_type binding_type,
|
||||
int32_t *address_offset)
|
||||
{
|
||||
const intel_device_info *devinfo = ntb.devinfo;
|
||||
const nir_src *nir_src_offset = nir_get_io_offset_src(instr);
|
||||
const brw_reg src_offset = get_nir_src_imm(ntb, *nir_src_offset);
|
||||
const enum lsc_addr_surface_type binding_type =
|
||||
(enum lsc_addr_surface_type) srcs[MEMORY_LOGICAL_BINDING_TYPE].ud;
|
||||
const brw_builder ubld = src_offset.is_scalar ? bld.scalar_group() : bld;
|
||||
brw_reg address;
|
||||
|
||||
if (devinfo->ver < 20 ||
|
||||
(!nir_intrinsic_has_base(instr) && !nir_src_is_const(*nir_src_offset))) {
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] =
|
||||
address =
|
||||
nir_intrinsic_has_base(instr) ?
|
||||
ubld.ADD(src_offset,
|
||||
brw_imm_int(src_offset.type, nir_intrinsic_base(instr))) :
|
||||
src_offset;
|
||||
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0);
|
||||
*address_offset = 0;
|
||||
} else if (!nir_intrinsic_has_base(instr) && nir_src_is_const(*nir_src_offset)) {
|
||||
const int32_t offset = nir_src_as_int(*nir_src_offset);
|
||||
if (can_use_instruction_offset(binding_type, offset)) {
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] = brw_imm_ud(0);
|
||||
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(offset);
|
||||
address = brw_imm_ud(0);
|
||||
*address_offset = offset;
|
||||
} else {
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] = src_offset;
|
||||
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0);
|
||||
address = src_offset;
|
||||
*address_offset = 0;
|
||||
}
|
||||
} else {
|
||||
assert(nir_intrinsic_has_base(instr));
|
||||
const int32_t offset = nir_intrinsic_base(instr);
|
||||
assert(can_use_instruction_offset(binding_type, offset));
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] = src_offset;
|
||||
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(offset);
|
||||
address = src_offset;
|
||||
*address_offset = offset;
|
||||
}
|
||||
|
||||
/* If nir_src is_scalar, the MEMORY_LOGICAL_ADDRESS will be allocated at
|
||||
|
|
@ -4661,7 +4663,9 @@ set_memory_address(nir_to_brw_state &ntb,
|
|||
* properly also ensures that emit_uniformize (below) will handle the value
|
||||
* as scalar_group() size instead of full dispatch width.
|
||||
*/
|
||||
srcs[MEMORY_LOGICAL_ADDRESS].is_scalar = src_offset.is_scalar;
|
||||
address.is_scalar = src_offset.is_scalar;
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
|
@ -4757,22 +4761,20 @@ brw_from_nir_emit_cs_intrinsic(nir_to_brw_state &ntb,
|
|||
cs_prog_data->uses_num_work_groups = true;
|
||||
|
||||
brw_reg srcs[MEMORY_LOGICAL_NUM_SRCS];
|
||||
srcs[MEMORY_LOGICAL_OPCODE] = brw_imm_ud(LSC_OP_LOAD);
|
||||
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED);
|
||||
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BTI);
|
||||
srcs[MEMORY_LOGICAL_BINDING] = brw_imm_ud(0);
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] = brw_imm_ud(0);
|
||||
srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud(1);
|
||||
srcs[MEMORY_LOGICAL_ALIGNMENT] = brw_imm_ud(4);
|
||||
srcs[MEMORY_LOGICAL_DATA_SIZE] = brw_imm_ud(LSC_DATA_SIZE_D32);
|
||||
srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(3);
|
||||
srcs[MEMORY_LOGICAL_FLAGS] = brw_imm_ud(0);
|
||||
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0);
|
||||
|
||||
brw_inst *inst =
|
||||
brw_mem_inst *mem =
|
||||
bld.emit(SHADER_OPCODE_MEMORY_LOAD_LOGICAL,
|
||||
dest, srcs, MEMORY_LOGICAL_NUM_SRCS);
|
||||
inst->size_written = 3 * s.dispatch_width * 4;
|
||||
dest, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem();
|
||||
mem->size_written = 3 * s.dispatch_width * 4;
|
||||
mem->lsc_op = LSC_OP_LOAD;
|
||||
mem->mode = MEMORY_MODE_UNTYPED;
|
||||
mem->binding_type = LSC_ADDR_SURFTYPE_BTI;
|
||||
mem->data_size = LSC_DATA_SIZE_D32;
|
||||
mem->coord_components = 1;
|
||||
mem->components = 3;
|
||||
mem->alignment = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -7090,30 +7092,26 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
(nir_intrinsic_access(instr) & ACCESS_COHERENT);
|
||||
const unsigned align =
|
||||
nir_intrinsic_has_align(instr) ? nir_intrinsic_align(instr) : 0;
|
||||
const unsigned logical_flags =
|
||||
uint8_t flags =
|
||||
(include_helpers ? MEMORY_FLAG_INCLUDE_HELPERS : 0) |
|
||||
(volatile_access ? MEMORY_FLAG_VOLATILE_ACCESS : 0) |
|
||||
(coherent_access ? MEMORY_FLAG_COHERENT_ACCESS : 0);
|
||||
bool no_mask_handle = false;
|
||||
int data_src = -1;
|
||||
|
||||
srcs[MEMORY_LOGICAL_OPCODE] = brw_imm_ud(op);
|
||||
/* BINDING_TYPE, BINDING, and ADDRESS are handled in the switch */
|
||||
srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud(1);
|
||||
srcs[MEMORY_LOGICAL_ALIGNMENT] = brw_imm_ud(align);
|
||||
/* DATA_SIZE and CHANNELS are handled below the switch */
|
||||
srcs[MEMORY_LOGICAL_FLAGS] = brw_imm_ud(logical_flags);
|
||||
/* DATA0 and DATA1 are handled below */
|
||||
uint8_t coord_components = 1;
|
||||
|
||||
/* Set the default address offset to 0 */
|
||||
srcs[MEMORY_LOGICAL_ADDRESS_OFFSET] = brw_imm_d(0);
|
||||
int32_t address_offset = 0;
|
||||
|
||||
std::optional<memory_logical_mode> mode;
|
||||
std::optional<lsc_addr_surface_type> binding_type;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
case nir_intrinsic_bindless_image_atomic:
|
||||
case nir_intrinsic_bindless_image_atomic_swap:
|
||||
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BSS);
|
||||
binding_type = LSC_ADDR_SURFTYPE_BSS;
|
||||
FALLTHROUGH;
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
|
|
@ -7127,22 +7125,22 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
* message. For TGM it will be 4 (U, V, R, SAMPLE_INDEX)."
|
||||
*
|
||||
*/
|
||||
srcs[MEMORY_LOGICAL_COORD_COMPONENTS] = brw_imm_ud(
|
||||
coord_components =
|
||||
(devinfo->ver >= 30 &&
|
||||
nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_MS) ? 4 :
|
||||
nir_image_intrinsic_coord_components(instr));
|
||||
nir_image_intrinsic_coord_components(instr);
|
||||
|
||||
/* MSAA image atomic accesses not supported, must be lowered to UGM */
|
||||
assert((instr->intrinsic != nir_intrinsic_bindless_image_atomic &&
|
||||
instr->intrinsic != nir_intrinsic_bindless_image_atomic_swap) ||
|
||||
nir_intrinsic_image_dim(instr) != GLSL_SAMPLER_DIM_MS);
|
||||
|
||||
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_TYPED);
|
||||
mode = MEMORY_MODE_TYPED;
|
||||
srcs[MEMORY_LOGICAL_BINDING] =
|
||||
get_nir_image_intrinsic_image(ntb, bld, instr);
|
||||
|
||||
if (srcs[MEMORY_LOGICAL_BINDING_TYPE].file == BAD_FILE)
|
||||
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_BTI);
|
||||
if (!binding_type.has_value())
|
||||
binding_type = LSC_ADDR_SURFTYPE_BTI;
|
||||
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] = get_nir_src(ntb, instr->src[1], 0);
|
||||
|
||||
|
|
@ -7150,7 +7148,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_CONSTANT);
|
||||
mode = MEMORY_MODE_CONSTANT;
|
||||
FALLTHROUGH;
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_ssbo_intel:
|
||||
|
|
@ -7161,14 +7159,14 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
case nir_intrinsic_load_ssbo_block_intel:
|
||||
case nir_intrinsic_store_ssbo_block_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
if (srcs[MEMORY_LOGICAL_MODE].file == BAD_FILE)
|
||||
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED);
|
||||
srcs[MEMORY_LOGICAL_BINDING_TYPE] =
|
||||
brw_imm_ud(get_nir_src_bindless(ntb, instr->src[is_store ? 1 : 0]) ?
|
||||
LSC_ADDR_SURFTYPE_BSS : LSC_ADDR_SURFTYPE_BTI);
|
||||
if (!mode.has_value())
|
||||
mode = MEMORY_MODE_UNTYPED;
|
||||
binding_type = get_nir_src_bindless(ntb, instr->src[is_store ? 1 : 0]) ?
|
||||
LSC_ADDR_SURFTYPE_BSS : LSC_ADDR_SURFTYPE_BTI;
|
||||
srcs[MEMORY_LOGICAL_BINDING] =
|
||||
get_nir_buffer_intrinsic_index(ntb, bld, instr, &no_mask_handle);
|
||||
set_memory_address(ntb, bld, instr, srcs);
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] =
|
||||
memory_address(ntb, bld, instr, *binding_type, &address_offset);
|
||||
data_src = is_atomic ? 2 : 0;
|
||||
break;
|
||||
case nir_intrinsic_load_shared:
|
||||
|
|
@ -7178,21 +7176,22 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
case nir_intrinsic_load_shared_block_intel:
|
||||
case nir_intrinsic_store_shared_block_intel:
|
||||
case nir_intrinsic_load_shared_uniform_block_intel: {
|
||||
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_SHARED_LOCAL);
|
||||
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT);
|
||||
set_memory_address(ntb, bld, instr, srcs);
|
||||
mode = MEMORY_MODE_SHARED_LOCAL;
|
||||
binding_type = LSC_ADDR_SURFTYPE_FLAT;
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] =
|
||||
memory_address(ntb, bld, instr, *binding_type, &address_offset);
|
||||
data_src = is_atomic ? 1 : 0;
|
||||
no_mask_handle = true;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_scratch:
|
||||
case nir_intrinsic_store_scratch: {
|
||||
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_SCRATCH);
|
||||
mode = MEMORY_MODE_SCRATCH;
|
||||
|
||||
const nir_src &addr = instr->src[is_store ? 1 : 0];
|
||||
|
||||
if (devinfo->verx10 >= 125) {
|
||||
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_SS);
|
||||
binding_type = LSC_ADDR_SURFTYPE_SS;
|
||||
|
||||
const brw_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0);
|
||||
brw_reg bind = ubld.AND(retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
|
||||
|
|
@ -7214,8 +7213,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
/* load_scratch / store_scratch cannot be is_scalar yet. */
|
||||
assert(xbld.dispatch_width() == bld.dispatch_width());
|
||||
|
||||
srcs[MEMORY_LOGICAL_BINDING_TYPE] =
|
||||
brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT);
|
||||
binding_type = LSC_ADDR_SURFTYPE_FLAT;
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] =
|
||||
swizzle_nir_scratch_addr(ntb, bld, addr, dword_aligned);
|
||||
}
|
||||
|
|
@ -7237,9 +7235,10 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
case nir_intrinsic_global_atomic_swap:
|
||||
case nir_intrinsic_load_global_block_intel:
|
||||
case nir_intrinsic_store_global_block_intel:
|
||||
srcs[MEMORY_LOGICAL_MODE] = brw_imm_ud(MEMORY_MODE_UNTYPED);
|
||||
srcs[MEMORY_LOGICAL_BINDING_TYPE] = brw_imm_ud(LSC_ADDR_SURFTYPE_FLAT);
|
||||
set_memory_address(ntb, bld, instr, srcs);
|
||||
mode = MEMORY_MODE_UNTYPED;
|
||||
binding_type = LSC_ADDR_SURFTYPE_FLAT;
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] =
|
||||
memory_address(ntb, bld, instr, *binding_type, &address_offset);
|
||||
data_src = is_atomic ? 1 : 0;
|
||||
no_mask_handle = srcs[MEMORY_LOGICAL_ADDRESS].is_scalar;
|
||||
break;
|
||||
|
|
@ -7253,15 +7252,11 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
if (components == 0)
|
||||
components = instr->num_components;
|
||||
|
||||
srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(components);
|
||||
|
||||
const unsigned nir_bit_size =
|
||||
is_store ? instr->src[data_src].ssa->bit_size : instr->def.bit_size;
|
||||
enum lsc_data_size data_size = lsc_bits_to_data_size(nir_bit_size);
|
||||
const enum lsc_data_size data_size = lsc_bits_to_data_size(nir_bit_size);
|
||||
uint32_t data_bit_size = lsc_data_size_bytes(data_size) * 8;
|
||||
|
||||
srcs[MEMORY_LOGICAL_DATA_SIZE] = brw_imm_ud(data_size);
|
||||
|
||||
const brw_reg_type data_type =
|
||||
brw_type_with_size(BRW_TYPE_UD, data_bit_size);
|
||||
const brw_reg_type nir_data_type =
|
||||
|
|
@ -7310,11 +7305,20 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
instr->intrinsic == nir_intrinsic_store_shared_block_intel ||
|
||||
instr->intrinsic == nir_intrinsic_store_ssbo_block_intel;
|
||||
|
||||
brw_inst *inst;
|
||||
brw_mem_inst *mem;
|
||||
|
||||
if (!block) {
|
||||
inst = xbld.emit(opcode, dest, srcs, MEMORY_LOGICAL_NUM_SRCS);
|
||||
inst->size_written *= components;
|
||||
mem = xbld.emit(opcode, dest, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem();
|
||||
mem->size_written *= components;
|
||||
mem->lsc_op = op;
|
||||
mem->mode = *mode;
|
||||
mem->binding_type = *binding_type;
|
||||
mem->address_offset = address_offset;
|
||||
mem->coord_components = coord_components;
|
||||
mem->data_size = data_size;
|
||||
mem->components = components;
|
||||
mem->alignment = align;
|
||||
mem->flags = flags;
|
||||
|
||||
if (dest.file != BAD_FILE && data_bit_size > nir_bit_size) {
|
||||
/* Shrink e.g. D16U32 result back to D16 */
|
||||
|
|
@ -7326,8 +7330,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
} else {
|
||||
assert(nir_bit_size == 32);
|
||||
|
||||
srcs[MEMORY_LOGICAL_FLAGS] =
|
||||
brw_imm_ud(MEMORY_FLAG_TRANSPOSE | srcs[MEMORY_LOGICAL_FLAGS].ud);
|
||||
flags |= MEMORY_FLAG_TRANSPOSE;
|
||||
srcs[MEMORY_LOGICAL_ADDRESS] =
|
||||
bld.emit_uniformize(srcs[MEMORY_LOGICAL_ADDRESS]);
|
||||
|
||||
|
|
@ -7344,8 +7347,7 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
*/
|
||||
if (srcs[MEMORY_LOGICAL_ADDRESS].file == IMM &&
|
||||
align >= data_bit_size / 8 &&
|
||||
(devinfo->has_lsc ||
|
||||
srcs[MEMORY_LOGICAL_MODE].ud != MEMORY_MODE_SHARED_LOCAL)) {
|
||||
(devinfo->has_lsc || mode != MEMORY_MODE_SHARED_LOCAL)) {
|
||||
first_read_component = nir_def_first_component_read(&instr->def);
|
||||
unsigned last_component = nir_def_last_component_read(&instr->def);
|
||||
srcs[MEMORY_LOGICAL_ADDRESS].u64 +=
|
||||
|
|
@ -7368,8 +7370,6 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
block_comps = choose_block_size_dwords(devinfo, total - done);
|
||||
const unsigned block_bytes = block_comps * (nir_bit_size / 8);
|
||||
|
||||
srcs[MEMORY_LOGICAL_COMPONENTS] = brw_imm_ud(block_comps);
|
||||
|
||||
brw_reg dst_offset = is_store ? brw_reg() :
|
||||
retype(byte_offset(dest, done * 4), BRW_TYPE_UD);
|
||||
if (is_store) {
|
||||
|
|
@ -7377,10 +7377,19 @@ brw_from_nir_emit_memory_access(nir_to_brw_state &ntb,
|
|||
retype(byte_offset(src, done * 4), BRW_TYPE_UD);
|
||||
}
|
||||
|
||||
inst = ubld.emit(opcode, dst_offset, srcs, MEMORY_LOGICAL_NUM_SRCS);
|
||||
inst->has_no_mask_send_params = no_mask_handle;
|
||||
mem = ubld.emit(opcode, dst_offset, srcs, MEMORY_LOGICAL_NUM_SRCS)->as_mem();
|
||||
mem->has_no_mask_send_params = no_mask_handle;
|
||||
if (is_load)
|
||||
inst->size_written = block_bytes;
|
||||
mem->size_written = block_bytes;
|
||||
mem->lsc_op = op;
|
||||
mem->mode = *mode;
|
||||
mem->binding_type = *binding_type;
|
||||
mem->address_offset = address_offset;
|
||||
mem->coord_components = coord_components;
|
||||
mem->data_size = data_size;
|
||||
mem->components = block_comps;
|
||||
mem->alignment = align;
|
||||
mem->flags = flags;
|
||||
|
||||
if (brw_type_size_bits(srcs[MEMORY_LOGICAL_ADDRESS].type) == 64) {
|
||||
increment_a64_address(ubld, srcs[MEMORY_LOGICAL_ADDRESS],
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ static inline unsigned
|
|||
brw_inst_kind_size(brw_inst_kind kind)
|
||||
{
|
||||
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_tex_inst));
|
||||
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_mem_inst));
|
||||
|
||||
/* TODO: Temporarily here to ensure all instructions can be converted to
|
||||
* SEND. Once all new kinds are added, change so that BASE allocate only
|
||||
|
|
@ -178,6 +179,11 @@ brw_inst_kind_for_opcode(enum opcode opcode)
|
|||
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
|
||||
return BRW_KIND_TEX;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
||||
return BRW_KIND_MEM;
|
||||
|
||||
default:
|
||||
return BRW_KIND_BASE;
|
||||
}
|
||||
|
|
@ -221,14 +227,6 @@ brw_inst::is_control_source(unsigned arg) const
|
|||
case SHADER_OPCODE_SEND_GATHER:
|
||||
return arg < SEND_SRC_PAYLOAD1;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
||||
return arg != MEMORY_LOGICAL_BINDING &&
|
||||
arg != MEMORY_LOGICAL_ADDRESS &&
|
||||
arg != MEMORY_LOGICAL_DATA0 &&
|
||||
arg != MEMORY_LOGICAL_DATA1;
|
||||
|
||||
case SHADER_OPCODE_QUAD_SWAP:
|
||||
case SHADER_OPCODE_INCLUSIVE_SCAN:
|
||||
case SHADER_OPCODE_EXCLUSIVE_SCAN:
|
||||
|
|
@ -492,13 +490,15 @@ brw_inst::components_read(unsigned i) const
|
|||
if (i == MEMORY_LOGICAL_DATA1)
|
||||
return 0;
|
||||
FALLTHROUGH;
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: {
|
||||
const brw_mem_inst *mem = as_mem();
|
||||
if (i == MEMORY_LOGICAL_DATA0 || i == MEMORY_LOGICAL_DATA1)
|
||||
return src[MEMORY_LOGICAL_COMPONENTS].ud;
|
||||
return mem->components;
|
||||
else if (i == MEMORY_LOGICAL_ADDRESS)
|
||||
return src[MEMORY_LOGICAL_COORD_COMPONENTS].ud;
|
||||
return mem->coord_components;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||
return (i == 0 ? 2 : 1);
|
||||
|
|
@ -946,8 +946,7 @@ brw_inst::is_volatile() const
|
|||
case SHADER_OPCODE_LOAD_REG:
|
||||
return true;
|
||||
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
||||
assert(sources > MEMORY_LOGICAL_FLAGS);
|
||||
return src[MEMORY_LOGICAL_FLAGS].ud & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||
return as_mem()->flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||
case SHADER_OPCODE_SEND:
|
||||
case SHADER_OPCODE_SEND_GATHER:
|
||||
return as_send()->is_volatile;
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ enum ENUM_PACKED brw_inst_kind {
|
|||
BRW_KIND_BASE,
|
||||
BRW_KIND_SEND,
|
||||
BRW_KIND_TEX,
|
||||
BRW_KIND_MEM,
|
||||
};
|
||||
|
||||
brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
|
||||
|
|
@ -70,6 +71,7 @@ struct brw_inst : brw_exec_node {
|
|||
|
||||
KIND_HELPERS(as_send, brw_send_inst, BRW_KIND_SEND);
|
||||
KIND_HELPERS(as_tex, brw_tex_inst, BRW_KIND_TEX);
|
||||
KIND_HELPERS(as_mem, brw_mem_inst, BRW_KIND_MEM);
|
||||
|
||||
#undef KIND_HELPERS
|
||||
|
||||
|
|
@ -278,6 +280,22 @@ struct brw_tex_inst : brw_inst {
|
|||
bool residency;
|
||||
};
|
||||
|
||||
struct brw_mem_inst : brw_inst {
|
||||
enum lsc_opcode lsc_op;
|
||||
enum memory_logical_mode mode;
|
||||
enum lsc_addr_surface_type binding_type;
|
||||
enum lsc_data_size data_size;
|
||||
|
||||
uint8_t coord_components;
|
||||
uint8_t components;
|
||||
uint8_t flags;
|
||||
|
||||
/** Required alignment of address in bytes; 0 for natural alignment */
|
||||
uint32_t alignment;
|
||||
|
||||
int32_t address_offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* Make the execution of \p inst dependent on the evaluation of a possibly
|
||||
* inverted predicate.
|
||||
|
|
|
|||
|
|
@ -1478,48 +1478,35 @@ lsc_addr_size_for_type(enum brw_reg_type type)
|
|||
}
|
||||
|
||||
static void
|
||||
lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
||||
lower_lsc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
||||
{
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
assert(devinfo->has_lsc);
|
||||
|
||||
assert(inst->src[MEMORY_LOGICAL_OPCODE].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_MODE].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_BINDING_TYPE].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_DATA_SIZE].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_FLAGS].file == IMM);
|
||||
|
||||
/* Get the logical send arguments. */
|
||||
const enum lsc_opcode op = (lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
|
||||
const enum memory_logical_mode mode =
|
||||
(enum memory_logical_mode) inst->src[MEMORY_LOGICAL_MODE].ud;
|
||||
const enum lsc_addr_surface_type binding_type =
|
||||
(enum lsc_addr_surface_type) inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud;
|
||||
const brw_reg binding = inst->src[MEMORY_LOGICAL_BINDING];
|
||||
const brw_reg addr = inst->src[MEMORY_LOGICAL_ADDRESS];
|
||||
const unsigned coord_components =
|
||||
inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].ud;
|
||||
enum lsc_data_size data_size =
|
||||
(enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
|
||||
const unsigned components = inst->src[MEMORY_LOGICAL_COMPONENTS].ud;
|
||||
const enum memory_flags flags =
|
||||
(enum memory_flags) inst->src[MEMORY_LOGICAL_FLAGS].ud;
|
||||
const bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
|
||||
const bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
|
||||
const bool volatile_access = flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||
const bool coherent_access = flags & MEMORY_FLAG_COHERENT_ACCESS;
|
||||
const brw_reg data0 = inst->src[MEMORY_LOGICAL_DATA0];
|
||||
const brw_reg data1 = inst->src[MEMORY_LOGICAL_DATA1];
|
||||
const bool has_side_effects = inst->has_side_effects();
|
||||
const brw_reg binding = mem->src[MEMORY_LOGICAL_BINDING];
|
||||
const brw_reg addr = mem->src[MEMORY_LOGICAL_ADDRESS];
|
||||
const brw_reg data0 = mem->src[MEMORY_LOGICAL_DATA0];
|
||||
const brw_reg data1 = mem->src[MEMORY_LOGICAL_DATA1];
|
||||
|
||||
const enum lsc_opcode op = mem->lsc_op;
|
||||
const enum memory_logical_mode mode = mem->mode;
|
||||
const enum lsc_addr_surface_type binding_type = mem->binding_type;
|
||||
const unsigned coord_components = mem->coord_components;
|
||||
enum lsc_data_size data_size = mem->data_size;
|
||||
const unsigned components = mem->components;
|
||||
const bool transpose = mem->flags & MEMORY_FLAG_TRANSPOSE;
|
||||
const bool include_helpers = mem->flags & MEMORY_FLAG_INCLUDE_HELPERS;
|
||||
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||
const bool coherent_access = mem->flags & MEMORY_FLAG_COHERENT_ACCESS;
|
||||
const bool has_side_effects = mem->has_side_effects();
|
||||
|
||||
const uint32_t data_size_B = lsc_data_size_bytes(data_size);
|
||||
const enum brw_reg_type data_type =
|
||||
brw_type_with_size(data0.type, data_size_B * 8);
|
||||
|
||||
const enum lsc_addr_size addr_size = lsc_addr_size_for_type(addr.type);
|
||||
assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM);
|
||||
const int32_t base_offset = inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d;
|
||||
const int32_t base_offset = mem->address_offset;
|
||||
|
||||
/**
|
||||
* TGM messages cannot have a base offset
|
||||
|
|
@ -1530,9 +1517,9 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
brw_reg payload = addr;
|
||||
|
||||
if (addr.file != VGRF || !addr.is_contiguous()) {
|
||||
if (inst->force_writemask_all) {
|
||||
if (mem->force_writemask_all) {
|
||||
const brw_builder dbld =
|
||||
inst->exec_size == 1 ?
|
||||
mem->exec_size == 1 ?
|
||||
bld.scalar_group() :
|
||||
bld.group(bld.shader->dispatch_width, 0);
|
||||
payload = dbld.move_to_vgrf(addr, coord_components);
|
||||
|
|
@ -1565,7 +1552,7 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
|
||||
payload2 = bld.vgrf(data0.type, size);
|
||||
bld.LOAD_PAYLOAD(payload2, data, size, 0);
|
||||
ex_mlen = (size * brw_type_size_bytes(data_type) * inst->exec_size) / REG_SIZE;
|
||||
ex_mlen = (size * brw_type_size_bytes(data_type) * mem->exec_size) / REG_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1616,13 +1603,13 @@ lower_lsc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
*/
|
||||
if (bld.shader->stage == MESA_SHADER_FRAGMENT && !transpose) {
|
||||
if (include_helpers)
|
||||
emit_predicate_on_vector_mask(bld, inst);
|
||||
emit_predicate_on_vector_mask(bld, mem);
|
||||
else if (has_side_effects && mode != MEMORY_MODE_SCRATCH)
|
||||
brw_emit_predicate_on_sample_mask(bld, inst);
|
||||
brw_emit_predicate_on_sample_mask(bld, mem);
|
||||
}
|
||||
|
||||
brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
|
||||
inst = NULL;
|
||||
brw_send_inst *send = brw_transform_inst_to_send(bld, mem);
|
||||
mem = NULL;
|
||||
|
||||
switch (mode) {
|
||||
case MEMORY_MODE_UNTYPED:
|
||||
|
|
@ -1704,42 +1691,29 @@ emit_a64_oword_block_header(const brw_builder &bld, const brw_reg &addr)
|
|||
}
|
||||
|
||||
static void
|
||||
lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
||||
lower_hdc_memory_logical_send(const brw_builder &bld, brw_mem_inst *mem)
|
||||
{
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const brw_compiler *compiler = bld.shader->compiler;
|
||||
|
||||
assert(inst->src[MEMORY_LOGICAL_OPCODE].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_MODE].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_BINDING_TYPE].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_DATA_SIZE].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_FLAGS].file == IMM);
|
||||
assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM);
|
||||
|
||||
/* Get the logical send arguments. */
|
||||
const enum lsc_opcode op = (lsc_opcode)inst->src[MEMORY_LOGICAL_OPCODE].ud;
|
||||
const enum memory_logical_mode mode =
|
||||
(enum memory_logical_mode) inst->src[MEMORY_LOGICAL_MODE].ud;
|
||||
enum lsc_addr_surface_type binding_type =
|
||||
(enum lsc_addr_surface_type) inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud;
|
||||
brw_reg binding = inst->src[MEMORY_LOGICAL_BINDING];
|
||||
const brw_reg addr = inst->src[MEMORY_LOGICAL_ADDRESS];
|
||||
const unsigned coord_components =
|
||||
inst->src[MEMORY_LOGICAL_COORD_COMPONENTS].ud;
|
||||
const unsigned alignment = inst->src[MEMORY_LOGICAL_ALIGNMENT].ud;
|
||||
const unsigned components = inst->src[MEMORY_LOGICAL_COMPONENTS].ud;
|
||||
const enum memory_flags flags =
|
||||
(enum memory_flags) inst->src[MEMORY_LOGICAL_FLAGS].ud;
|
||||
const bool block = flags & MEMORY_FLAG_TRANSPOSE;
|
||||
const bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
|
||||
const bool volatile_access = flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||
const brw_reg data0 = inst->src[MEMORY_LOGICAL_DATA0];
|
||||
const brw_reg data1 = inst->src[MEMORY_LOGICAL_DATA1];
|
||||
const bool has_side_effects = inst->has_side_effects();
|
||||
const bool has_dest = inst->dst.file != BAD_FILE && !inst->dst.is_null();
|
||||
assert(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].file == IMM &&
|
||||
inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d == 0);
|
||||
brw_reg binding = mem->src[MEMORY_LOGICAL_BINDING];
|
||||
const brw_reg addr = mem->src[MEMORY_LOGICAL_ADDRESS];
|
||||
const brw_reg data0 = mem->src[MEMORY_LOGICAL_DATA0];
|
||||
const brw_reg data1 = mem->src[MEMORY_LOGICAL_DATA1];
|
||||
|
||||
const enum lsc_opcode op = mem->lsc_op;
|
||||
const enum memory_logical_mode mode = mem->mode;
|
||||
enum lsc_addr_surface_type binding_type = mem->binding_type;
|
||||
const unsigned coord_components = mem->coord_components;
|
||||
const unsigned alignment = mem->alignment;
|
||||
const unsigned components = mem->components;
|
||||
const bool block = mem->flags & MEMORY_FLAG_TRANSPOSE;
|
||||
const bool include_helpers = mem->flags & MEMORY_FLAG_INCLUDE_HELPERS;
|
||||
const bool volatile_access = mem->flags & MEMORY_FLAG_VOLATILE_ACCESS;
|
||||
const bool has_side_effects = mem->has_side_effects();
|
||||
const bool has_dest = mem->dst.file != BAD_FILE && !mem->dst.is_null();
|
||||
assert(mem->address_offset == 0);
|
||||
|
||||
/* Don't predicate scratch writes on the sample mask. Otherwise,
|
||||
* FS helper invocations would load undefined values from scratch memory.
|
||||
|
|
@ -1749,8 +1723,7 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
*/
|
||||
bool allow_sample_mask = has_side_effects && mode != MEMORY_MODE_SCRATCH;
|
||||
|
||||
const enum lsc_data_size data_size =
|
||||
(enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
|
||||
const enum lsc_data_size data_size = mem->data_size;
|
||||
|
||||
/* unpadded data size */
|
||||
const uint32_t data_bit_size =
|
||||
|
|
@ -1801,10 +1774,10 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
*/
|
||||
if (bld.shader->stage == MESA_SHADER_FRAGMENT) {
|
||||
if (include_helpers)
|
||||
emit_predicate_on_vector_mask(bld, inst);
|
||||
emit_predicate_on_vector_mask(bld, mem);
|
||||
else if (allow_sample_mask &&
|
||||
(header.file == BAD_FILE || !surface_access))
|
||||
brw_emit_predicate_on_sample_mask(bld, inst);
|
||||
brw_emit_predicate_on_sample_mask(bld, mem);
|
||||
}
|
||||
|
||||
brw_reg payload, payload2;
|
||||
|
|
@ -1870,12 +1843,12 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
sfid = BRW_SFID_HDC1;
|
||||
|
||||
if (lsc_opcode_is_atomic(op)) {
|
||||
desc = brw_dp_typed_atomic_desc(devinfo, inst->exec_size, inst->group,
|
||||
desc = brw_dp_typed_atomic_desc(devinfo, mem->exec_size, mem->group,
|
||||
lsc_op_to_legacy_atomic(op),
|
||||
has_dest);
|
||||
} else {
|
||||
desc = brw_dp_typed_surface_rw_desc(devinfo, inst->exec_size,
|
||||
inst->group, components, !has_dest);
|
||||
desc = brw_dp_typed_surface_rw_desc(devinfo, mem->exec_size,
|
||||
mem->group, components, !has_dest);
|
||||
}
|
||||
} else if (mode == MEMORY_MODE_CONSTANT) {
|
||||
assert(block); /* non-block loads not yet handled */
|
||||
|
|
@ -1891,11 +1864,11 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
if (lsc_opcode_is_atomic(op)) {
|
||||
unsigned aop = lsc_op_to_legacy_atomic(op);
|
||||
if (lsc_opcode_is_atomic_float(op)) {
|
||||
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, mem->exec_size,
|
||||
data_bit_size, aop,
|
||||
has_dest);
|
||||
} else {
|
||||
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_a64_untyped_atomic_desc(devinfo, mem->exec_size,
|
||||
data_bit_size, aop,
|
||||
has_dest);
|
||||
}
|
||||
|
|
@ -1903,10 +1876,10 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
desc = brw_dp_a64_oword_block_rw_desc(devinfo, oword_aligned,
|
||||
components, !has_dest);
|
||||
} else if (byte_scattered) {
|
||||
desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_a64_byte_scattered_rw_desc(devinfo, mem->exec_size,
|
||||
data_bit_size, !has_dest);
|
||||
} else {
|
||||
desc = brw_dp_a64_untyped_surface_rw_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_a64_untyped_surface_rw_desc(devinfo, mem->exec_size,
|
||||
components, !has_dest);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -1917,31 +1890,31 @@ lower_hdc_memory_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
if (lsc_opcode_is_atomic(op)) {
|
||||
unsigned aop = lsc_op_to_legacy_atomic(op);
|
||||
if (lsc_opcode_is_atomic_float(op)) {
|
||||
desc = brw_dp_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_untyped_atomic_float_desc(devinfo, mem->exec_size,
|
||||
aop, has_dest);
|
||||
} else {
|
||||
desc = brw_dp_untyped_atomic_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_untyped_atomic_desc(devinfo, mem->exec_size,
|
||||
aop, has_dest);
|
||||
}
|
||||
} else if (block) {
|
||||
desc = brw_dp_oword_block_rw_desc(devinfo, oword_aligned,
|
||||
components, !has_dest);
|
||||
} else if (byte_scattered) {
|
||||
desc = brw_dp_byte_scattered_rw_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_byte_scattered_rw_desc(devinfo, mem->exec_size,
|
||||
data_bit_size, !has_dest);
|
||||
} else if (dword_scattered) {
|
||||
desc = brw_dp_dword_scattered_rw_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_dword_scattered_rw_desc(devinfo, mem->exec_size,
|
||||
!has_dest);
|
||||
} else {
|
||||
desc = brw_dp_untyped_surface_rw_desc(devinfo, inst->exec_size,
|
||||
desc = brw_dp_untyped_surface_rw_desc(devinfo, mem->exec_size,
|
||||
components, !has_dest);
|
||||
}
|
||||
}
|
||||
|
||||
assert(sfid);
|
||||
|
||||
brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
|
||||
inst = NULL;
|
||||
brw_send_inst *send = brw_transform_inst_to_send(bld, mem);
|
||||
mem = NULL;
|
||||
|
||||
send->sfid = sfid;
|
||||
send->mlen = mlen;
|
||||
|
|
@ -2682,14 +2655,15 @@ brw_lower_logical_sends(brw_shader &s)
|
|||
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: {
|
||||
brw_mem_inst *mem = inst->as_mem();
|
||||
if (devinfo->ver >= 20 ||
|
||||
(devinfo->has_lsc &&
|
||||
inst->src[MEMORY_LOGICAL_MODE].ud != MEMORY_MODE_TYPED))
|
||||
lower_lsc_memory_logical_send(ibld, inst);
|
||||
(devinfo->has_lsc && mem->mode != MEMORY_MODE_TYPED))
|
||||
lower_lsc_memory_logical_send(ibld, mem);
|
||||
else
|
||||
lower_hdc_memory_logical_send(ibld, inst);
|
||||
lower_hdc_memory_logical_send(ibld, mem);
|
||||
break;
|
||||
}
|
||||
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
||||
if (devinfo->has_lsc && !s.compiler->indirect_ubos_use_sampler)
|
||||
|
|
|
|||
|
|
@ -376,21 +376,23 @@ brw_get_lowered_simd_width(const brw_shader *shader, const brw_inst *inst)
|
|||
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
||||
if (devinfo->ver >= 20)
|
||||
return inst->exec_size;
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL: {
|
||||
const brw_mem_inst *mem = inst->as_mem();
|
||||
|
||||
if (inst->src[MEMORY_LOGICAL_MODE].ud == MEMORY_MODE_TYPED)
|
||||
if (devinfo->ver >= 20)
|
||||
return mem->exec_size;
|
||||
|
||||
if (mem->mode == MEMORY_MODE_TYPED)
|
||||
return 8;
|
||||
|
||||
/* HDC A64 atomics are limited to SIMD8 */
|
||||
if (!devinfo->has_lsc &&
|
||||
inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT
|
||||
&& lsc_opcode_is_atomic((enum lsc_opcode)
|
||||
inst->src[MEMORY_LOGICAL_OPCODE].ud))
|
||||
mem->binding_type == LSC_ADDR_SURFTYPE_FLAT &&
|
||||
lsc_opcode_is_atomic(mem->lsc_op))
|
||||
return 8;
|
||||
|
||||
return MIN2(16, inst->exec_size);
|
||||
return MIN2(16, mem->exec_size);
|
||||
}
|
||||
|
||||
/* On gfx12 parameters are fixed to 16-bit values and therefore they all
|
||||
* always fit regardless of the execution size.
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ is_expression(const brw_shader *v, const brw_inst *const inst)
|
|||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||
return true;
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
return inst->src[MEMORY_LOGICAL_MODE].ud == MEMORY_MODE_CONSTANT;
|
||||
return inst->as_mem()->mode == MEMORY_MODE_CONSTANT;
|
||||
case SHADER_OPCODE_LOAD_PAYLOAD:
|
||||
return !is_coalescing_payload(*v, inst);
|
||||
case SHADER_OPCODE_SEND:
|
||||
|
|
@ -259,6 +259,20 @@ tex_inst_match(brw_tex_inst *a, brw_tex_inst *b)
|
|||
a->residency == b->residency;
|
||||
}
|
||||
|
||||
static bool
|
||||
mem_inst_match(brw_mem_inst *a, brw_mem_inst *b)
|
||||
{
|
||||
return a->lsc_op == b->lsc_op &&
|
||||
a->mode == b->mode &&
|
||||
a->binding_type == b->binding_type &&
|
||||
a->data_size == b->data_size &&
|
||||
a->coord_components == b->coord_components &&
|
||||
a->components == b->components &&
|
||||
a->flags == b->flags &&
|
||||
a->alignment == b->alignment &&
|
||||
a->address_offset == b->address_offset;
|
||||
}
|
||||
|
||||
static bool
|
||||
instructions_match(brw_inst *a, brw_inst *b, bool *negate)
|
||||
{
|
||||
|
|
@ -267,6 +281,7 @@ instructions_match(brw_inst *a, brw_inst *b, bool *negate)
|
|||
/* `kind` is derived from opcode, so skipped. */
|
||||
(a->kind != BRW_KIND_SEND || send_inst_match(a->as_send(), b->as_send())) &&
|
||||
(a->kind != BRW_KIND_TEX || tex_inst_match(a->as_tex(), b->as_tex())) &&
|
||||
(a->kind != BRW_KIND_MEM || mem_inst_match(a->as_mem(), b->as_mem())) &&
|
||||
a->exec_size == b->exec_size &&
|
||||
a->group == b->group &&
|
||||
a->predicate == b->predicate &&
|
||||
|
|
@ -362,6 +377,26 @@ hash_inst(const void *v)
|
|||
break;
|
||||
}
|
||||
|
||||
case BRW_KIND_MEM: {
|
||||
const brw_mem_inst *mem = inst->as_mem();
|
||||
const uint8_t mem_u8data[] = {
|
||||
mem->lsc_op,
|
||||
mem->mode,
|
||||
mem->binding_type,
|
||||
mem->data_size,
|
||||
mem->coord_components,
|
||||
mem->components,
|
||||
mem->flags,
|
||||
};
|
||||
const uint32_t mem_u32data[] = {
|
||||
(uint32_t)mem->address_offset,
|
||||
mem->alignment,
|
||||
};
|
||||
hash = HASH(hash, mem_u8data);
|
||||
hash = HASH(hash, mem_u32data);
|
||||
break;
|
||||
}
|
||||
|
||||
case BRW_KIND_BASE:
|
||||
/* Nothing else to do. */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -321,61 +321,17 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
|||
static bool
|
||||
print_memory_logical_source(FILE *file, const brw_inst *inst, unsigned i)
|
||||
{
|
||||
if (inst->is_control_source(i)) {
|
||||
assert(inst->src[i].file == IMM &&
|
||||
(inst->src[i].type == BRW_TYPE_UD ||
|
||||
inst->src[i].type == BRW_TYPE_D));
|
||||
assert(!inst->src[i].negate);
|
||||
assert(!inst->src[i].abs);
|
||||
}
|
||||
|
||||
switch (i) {
|
||||
case MEMORY_LOGICAL_OPCODE:
|
||||
fprintf(file, " %s", brw_lsc_op_to_string(inst->src[i].ud));
|
||||
return true;
|
||||
case MEMORY_LOGICAL_MODE: {
|
||||
static const char *modes[] = {
|
||||
[MEMORY_MODE_TYPED] = "typed",
|
||||
[MEMORY_MODE_UNTYPED] = "untyped",
|
||||
[MEMORY_MODE_SHARED_LOCAL] = "shared",
|
||||
[MEMORY_MODE_SCRATCH] = "scratch",
|
||||
[MEMORY_MODE_CONSTANT] = "const",
|
||||
};
|
||||
assert(inst->src[i].ud < ARRAY_SIZE(modes));
|
||||
fprintf(file, " %s", modes[inst->src[i].ud]);
|
||||
return true;
|
||||
}
|
||||
case MEMORY_LOGICAL_BINDING_TYPE:
|
||||
fprintf(file, " %s", brw_lsc_addr_surftype_to_string(inst->src[i].ud));
|
||||
if (inst->src[i].ud != LSC_ADDR_SURFTYPE_FLAT)
|
||||
case MEMORY_LOGICAL_BINDING: {
|
||||
lsc_addr_surface_type binding_type = inst->as_mem()->binding_type;
|
||||
fprintf(file, " %s", brw_lsc_addr_surftype_to_string(binding_type));
|
||||
if (binding_type != LSC_ADDR_SURFTYPE_FLAT)
|
||||
fprintf(file, ":");
|
||||
return true;
|
||||
case MEMORY_LOGICAL_BINDING:
|
||||
return inst->src[i].file == BAD_FILE;
|
||||
}
|
||||
case MEMORY_LOGICAL_ADDRESS:
|
||||
fprintf(file, " addr: ");
|
||||
return false;
|
||||
case MEMORY_LOGICAL_ADDRESS_OFFSET:
|
||||
fprintf(file, " offset: ");
|
||||
return false;
|
||||
case MEMORY_LOGICAL_COORD_COMPONENTS:
|
||||
fprintf(file, " coord_comps:");
|
||||
return false;
|
||||
case MEMORY_LOGICAL_ALIGNMENT:
|
||||
fprintf(file, " align:");
|
||||
return false;
|
||||
case MEMORY_LOGICAL_DATA_SIZE:
|
||||
fprintf(file, " %s", brw_lsc_data_size_to_string(inst->src[i].ud));
|
||||
return true;
|
||||
case MEMORY_LOGICAL_COMPONENTS:
|
||||
fprintf(file, " comps:");
|
||||
return false;
|
||||
case MEMORY_LOGICAL_FLAGS:
|
||||
if (inst->src[i].ud & MEMORY_FLAG_TRANSPOSE)
|
||||
fprintf(file, " transpose");
|
||||
if (inst->src[i].ud & MEMORY_FLAG_INCLUDE_HELPERS)
|
||||
fprintf(file, " helpers");
|
||||
return true;
|
||||
case MEMORY_LOGICAL_DATA0:
|
||||
fprintf(file, " data0: ");
|
||||
return false;
|
||||
|
|
@ -499,10 +455,37 @@ brw_print_instruction(const brw_shader &s, const brw_inst *inst, FILE *file, con
|
|||
fprintf(file, ":%s", brw_reg_type_to_letters(inst->dst.type));
|
||||
}
|
||||
|
||||
const brw_mem_inst *mem = inst->as_mem();
|
||||
if (mem) {
|
||||
fprintf(file, " %s", brw_lsc_op_to_string(mem->lsc_op));
|
||||
|
||||
static const char *modes[] = {
|
||||
[MEMORY_MODE_TYPED] = "typed",
|
||||
[MEMORY_MODE_UNTYPED] = "untyped",
|
||||
[MEMORY_MODE_SHARED_LOCAL] = "shared",
|
||||
[MEMORY_MODE_SCRATCH] = "scratch",
|
||||
[MEMORY_MODE_CONSTANT] = "const",
|
||||
};
|
||||
assert(mem->mode < ARRAY_SIZE(modes));
|
||||
fprintf(file, " %s", modes[mem->mode]);
|
||||
fprintf(file, " offset: %dd", mem->address_offset);
|
||||
fprintf(file, " coord_comps: %uu", mem->coord_components);
|
||||
fprintf(file, " %s", brw_lsc_data_size_to_string(mem->data_size));
|
||||
fprintf(file, " comps: %uu", mem->components);
|
||||
fprintf(file, " align: %uu", mem->alignment);
|
||||
|
||||
if (mem->flags & MEMORY_FLAG_TRANSPOSE)
|
||||
fprintf(file, " transpose");
|
||||
if (mem->flags & MEMORY_FLAG_INCLUDE_HELPERS)
|
||||
fprintf(file, " helpers");
|
||||
if (mem->flags & MEMORY_FLAG_VOLATILE_ACCESS)
|
||||
fprintf(file, " volatile");
|
||||
if (mem->flags & MEMORY_FLAG_COHERENT_ACCESS)
|
||||
fprintf(file, " coherent");
|
||||
}
|
||||
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->opcode == SHADER_OPCODE_MEMORY_LOAD_LOGICAL ||
|
||||
inst->opcode == SHADER_OPCODE_MEMORY_STORE_LOGICAL ||
|
||||
inst->opcode == SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL) {
|
||||
if (mem) {
|
||||
if (print_memory_logical_source(file, inst, i))
|
||||
continue;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -90,36 +90,18 @@ is_ud_imm(const brw_reg ®)
|
|||
return reg.file == IMM && reg.type == BRW_TYPE_UD;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_d_imm(const brw_reg ®)
|
||||
{
|
||||
return reg.file == IMM && reg.type == BRW_TYPE_D;
|
||||
}
|
||||
|
||||
static void
|
||||
validate_memory_logical(const brw_shader &s, const brw_inst *inst)
|
||||
validate_memory_logical(const brw_shader &s, const brw_mem_inst *inst)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
|
||||
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE]));
|
||||
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE]));
|
||||
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE]));
|
||||
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS]));
|
||||
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT]));
|
||||
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE]));
|
||||
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS]));
|
||||
VAL_ASSERT(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS]));
|
||||
VAL_ASSERT(is_d_imm(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET]));
|
||||
|
||||
enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
|
||||
enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud;
|
||||
enum lsc_opcode op = inst->lsc_op;
|
||||
enum memory_flags flags = (memory_flags)inst->flags;
|
||||
bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
|
||||
bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
|
||||
enum memory_logical_mode mode =
|
||||
(memory_logical_mode)inst->src[MEMORY_LOGICAL_MODE].ud;
|
||||
enum memory_logical_mode mode = inst->mode;
|
||||
|
||||
enum lsc_data_size data_size =
|
||||
(enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
|
||||
enum lsc_data_size data_size = inst->data_size;
|
||||
unsigned data_size_B = lsc_data_size_bytes(data_size);
|
||||
|
||||
if (!devinfo->has_lsc) {
|
||||
|
|
@ -131,18 +113,18 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst)
|
|||
if (transpose) {
|
||||
const unsigned min_alignment =
|
||||
mode == MEMORY_MODE_SHARED_LOCAL ? 16 : 4;
|
||||
VAL_ASSERT_GE(inst->src[MEMORY_LOGICAL_ALIGNMENT].ud, min_alignment);
|
||||
VAL_ASSERT_GE(inst->alignment, min_alignment);
|
||||
}
|
||||
}
|
||||
|
||||
VAL_ASSERT(!transpose || !include_helpers);
|
||||
VAL_ASSERT(!transpose || lsc_opcode_has_transpose(op));
|
||||
|
||||
if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT)
|
||||
if (inst->binding_type == LSC_ADDR_SURFTYPE_FLAT)
|
||||
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_BINDING].file, BAD_FILE);
|
||||
|
||||
if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) {
|
||||
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud,
|
||||
VAL_ASSERT_EQ(inst->components,
|
||||
inst->components_read(MEMORY_LOGICAL_DATA1));
|
||||
|
||||
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_DATA0].type,
|
||||
|
|
@ -150,7 +132,7 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst)
|
|||
}
|
||||
|
||||
if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) {
|
||||
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud,
|
||||
VAL_ASSERT_EQ(inst->components,
|
||||
inst->components_read(MEMORY_LOGICAL_DATA0));
|
||||
|
||||
VAL_ASSERT_EQ(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type),
|
||||
|
|
@ -162,10 +144,10 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst)
|
|||
|
||||
/** TGM messages cannot have a base offset */
|
||||
if (mode == MEMORY_MODE_TYPED)
|
||||
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d, 0);
|
||||
VAL_ASSERT_EQ(inst->as_mem()->address_offset, 0);
|
||||
|
||||
/* Offset must be DWord aligned */
|
||||
VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_ADDRESS_OFFSET].d % 4), 0);
|
||||
VAL_ASSERT_EQ((inst->as_mem()->address_offset % 4), 0);
|
||||
|
||||
switch (inst->opcode) {
|
||||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
|
|
@ -185,7 +167,7 @@ validate_memory_logical(const brw_shader &s, const brw_inst *inst)
|
|||
(lsc_op_num_data_values(op) < 1));
|
||||
VAL_ASSERT_EQ((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE),
|
||||
(lsc_op_num_data_values(op) < 2));
|
||||
VAL_ASSERT_EQ(inst->src[MEMORY_LOGICAL_COMPONENTS].ud, 1);
|
||||
VAL_ASSERT_EQ(inst->components, 1);
|
||||
VAL_ASSERT(!include_helpers);
|
||||
break;
|
||||
default:
|
||||
|
|
@ -336,7 +318,7 @@ brw_validate(const brw_shader &s)
|
|||
case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
|
||||
case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
|
||||
validate_memory_logical(s, inst);
|
||||
validate_memory_logical(s, inst->as_mem());
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue