mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
pan/mdg: improve ldst opcode names and add missing ops
Signed-off-by: Italo Nicola <italonicola@collabora.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9461>
This commit is contained in:
parent
a567f60b3e
commit
d2119073de
9 changed files with 343 additions and 169 deletions
|
|
@ -570,7 +570,7 @@ v_load_store_scratch(
|
|||
.dest = ~0,
|
||||
.src = { ~0, ~0, ~0, ~0 },
|
||||
.swizzle = SWIZZLE_IDENTITY_4,
|
||||
.op = is_store ? midgard_op_st_u128 : midgard_op_ld_u128,
|
||||
.op = is_store ? midgard_op_st_128 : midgard_op_ld_128,
|
||||
.load_store = {
|
||||
/* For register spilling - to thread local storage */
|
||||
.arg_1 = 0xEA,
|
||||
|
|
|
|||
|
|
@ -33,8 +33,9 @@
|
|||
)
|
||||
|
||||
#define OP_IS_PROJECTION(op) ( \
|
||||
op == midgard_op_ldst_perspective_division_z || \
|
||||
op == midgard_op_ldst_perspective_division_w \
|
||||
op == midgard_op_ldst_perspective_div_y || \
|
||||
op == midgard_op_ldst_perspective_div_z || \
|
||||
op == midgard_op_ldst_perspective_div_w \
|
||||
)
|
||||
|
||||
#define OP_IS_VEC4_ONLY(op) ( \
|
||||
|
|
@ -43,16 +44,13 @@
|
|||
)
|
||||
|
||||
#define OP_IS_MOVE(op) ( \
|
||||
op == midgard_alu_op_fmov || \
|
||||
(op >= midgard_alu_op_fmov && op <= midgard_alu_op_fmov_rtp) || \
|
||||
op == midgard_alu_op_imov \
|
||||
)
|
||||
|
||||
#define OP_IS_UBO_READ(op) ( \
|
||||
op == midgard_op_ld_ubo_u8 || \
|
||||
op == midgard_op_ld_ubo_u16 || \
|
||||
op == midgard_op_ld_ubo_u32 || \
|
||||
op == midgard_op_ld_ubo_u64 || \
|
||||
op == midgard_op_ld_ubo_u128 \
|
||||
op >= midgard_op_ld_ubo_u8 && \
|
||||
op <= midgard_op_ld_ubo_128_bswap8 \
|
||||
)
|
||||
|
||||
#define OP_IS_CSEL_V(op) ( \
|
||||
|
|
@ -81,7 +79,32 @@
|
|||
|
||||
#define OP_IS_COMMON_STORE(op) ( \
|
||||
op >= midgard_op_st_u8 && \
|
||||
op <= midgard_op_st_u128 \
|
||||
op <= midgard_op_st_128_bswap8 \
|
||||
)
|
||||
|
||||
#define OP_IS_IMAGE(op) ( \
|
||||
(op >= midgard_op_ld_image_32f && op <= midgard_op_ld_image_32i) || \
|
||||
(op >= midgard_op_st_image_32f && op <= midgard_op_st_image_32i) || \
|
||||
op == midgard_op_lea_image \
|
||||
)
|
||||
|
||||
#define OP_IS_SPECIAL(op) ( \
|
||||
(op >= midgard_op_ld_special_32f && op <= midgard_op_ld_special_32i) || \
|
||||
(op >= midgard_op_st_special_32f && op <= midgard_op_st_special_32i) \
|
||||
)
|
||||
|
||||
#define OP_IS_PACK_COLOUR(op) ( \
|
||||
(op >= midgard_op_pack_colour_f32 && op <= midgard_op_pack_colour_s32) \
|
||||
)
|
||||
|
||||
#define OP_IS_UNPACK_COLOUR(op) ( \
|
||||
(op >= midgard_op_unpack_colour_f32 && op <= midgard_op_unpack_colour_s32) \
|
||||
)
|
||||
|
||||
/* Instructions that are on the load/store unit but don't access memory */
|
||||
#define OP_IS_REG2REG_LDST(op) ( \
|
||||
op >= midgard_op_unpack_colour_f32 && \
|
||||
op <= midgard_op_ldst_perspective_div_w \
|
||||
)
|
||||
|
||||
/* ALU control words are single bit fields with a lot of space */
|
||||
|
|
|
|||
|
|
@ -458,17 +458,23 @@ midgard_writeout;
|
|||
typedef enum {
|
||||
midgard_op_ld_st_noop = 0x03,
|
||||
|
||||
/* Unpack a colour from a native format to fp16 */
|
||||
midgard_op_unpack_colour = 0x05,
|
||||
/* Unpacks a colour from a native format to <format> */
|
||||
midgard_op_unpack_colour_f32 = 0x04,
|
||||
midgard_op_unpack_colour_f16 = 0x05,
|
||||
midgard_op_unpack_colour_u32 = 0x06,
|
||||
midgard_op_unpack_colour_s32 = 0x07,
|
||||
|
||||
/* Packs a colour from fp16 to a native format */
|
||||
midgard_op_pack_colour = 0x09,
|
||||
/* Packs a colour from <format> to a native format */
|
||||
midgard_op_pack_colour_f32 = 0x08,
|
||||
midgard_op_pack_colour_f16 = 0x09,
|
||||
midgard_op_pack_colour_u32 = 0x0A,
|
||||
midgard_op_pack_colour_s32 = 0x0B,
|
||||
|
||||
/* Likewise packs from fp32 */
|
||||
midgard_op_pack_colour_32 = 0x0A,
|
||||
/* Computes the effective address of a mem address expression */
|
||||
midgard_op_lea = 0x0C,
|
||||
|
||||
/* Converts image/tex coordinates into mem address */
|
||||
midgard_op_lea_tex = 0x0D,
|
||||
/* Converts image coordinates into mem address */
|
||||
midgard_op_lea_image = 0x0D,
|
||||
|
||||
/* Unclear why this is on the L/S unit, but moves fp32 cube map
|
||||
* coordinates in r27 to its cube map texture coordinate destination
|
||||
|
|
@ -476,52 +482,83 @@ typedef enum {
|
|||
|
||||
midgard_op_ld_cubemap_coords = 0x0E,
|
||||
|
||||
/* Loads a global/local/group ID, depending on arguments */
|
||||
midgard_op_ld_compute_id = 0x10,
|
||||
/* A mov between registers that the ldst pipeline can access */
|
||||
midgard_op_ldst_mov = 0x10,
|
||||
|
||||
/* The L/S unit can do perspective division a clock faster than the ALU
|
||||
* if you're lucky. Put the vec4 in r27, and call with 0x24 as the
|
||||
* unknown state; the output will be <x/w, y/w, z/w, 1>. Replace w with
|
||||
* z for the z version */
|
||||
midgard_op_ldst_perspective_division_z = 0x12,
|
||||
midgard_op_ldst_perspective_division_w = 0x13,
|
||||
midgard_op_ldst_perspective_div_y = 0x11,
|
||||
midgard_op_ldst_perspective_div_z = 0x12,
|
||||
midgard_op_ldst_perspective_div_w = 0x13,
|
||||
|
||||
/* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */
|
||||
midgard_op_atomic_add = 0x40,
|
||||
midgard_op_atomic_add64 = 0x41,
|
||||
midgard_op_atomic_add_be = 0x42,
|
||||
midgard_op_atomic_add64_be = 0x43,
|
||||
|
||||
midgard_op_atomic_and = 0x44,
|
||||
midgard_op_atomic_and64 = 0x45,
|
||||
midgard_op_atomic_and_be = 0x46,
|
||||
midgard_op_atomic_and64_be = 0x47,
|
||||
midgard_op_atomic_or = 0x48,
|
||||
midgard_op_atomic_or64 = 0x49,
|
||||
midgard_op_atomic_or_be = 0x4A,
|
||||
midgard_op_atomic_or64_be = 0x4B,
|
||||
midgard_op_atomic_xor = 0x4C,
|
||||
midgard_op_atomic_xor64 = 0x4D,
|
||||
midgard_op_atomic_xor_be = 0x4E,
|
||||
midgard_op_atomic_xor64_be = 0x4F,
|
||||
|
||||
midgard_op_atomic_imin = 0x50,
|
||||
midgard_op_atomic_imin64 = 0x51,
|
||||
midgard_op_atomic_imin_be = 0x52,
|
||||
midgard_op_atomic_imin64_be = 0x53,
|
||||
midgard_op_atomic_umin = 0x54,
|
||||
midgard_op_atomic_umin64 = 0x55,
|
||||
midgard_op_atomic_umin_be = 0x56,
|
||||
midgard_op_atomic_umin64_be = 0x57,
|
||||
midgard_op_atomic_imax = 0x58,
|
||||
midgard_op_atomic_imax64 = 0x59,
|
||||
midgard_op_atomic_imax_be = 0x5A,
|
||||
midgard_op_atomic_imax64_be = 0x5B,
|
||||
midgard_op_atomic_umax = 0x5C,
|
||||
midgard_op_atomic_umax64 = 0x5D,
|
||||
midgard_op_atomic_umax_be = 0x5E,
|
||||
midgard_op_atomic_umax64_be = 0x5F,
|
||||
|
||||
midgard_op_atomic_xchg = 0x60,
|
||||
midgard_op_atomic_xchg64 = 0x61,
|
||||
midgard_op_atomic_xchg_be = 0x62,
|
||||
midgard_op_atomic_xchg64_be = 0x63,
|
||||
|
||||
midgard_op_atomic_cmpxchg = 0x64,
|
||||
midgard_op_atomic_cmpxchg64 = 0x65,
|
||||
midgard_op_atomic_cmpxchg_be = 0x66,
|
||||
midgard_op_atomic_cmpxchg64_be = 0x67,
|
||||
|
||||
/* Used for compute shader's __global arguments, __local variables (or
|
||||
* for register spilling) */
|
||||
/* Used for compute shader's __global arguments, __local
|
||||
* variables (or for register spilling) */
|
||||
|
||||
midgard_op_ld_u8 = 0x80, /* zero extends */
|
||||
midgard_op_ld_i8 = 0x81, /* sign extends */
|
||||
midgard_op_ld_u16 = 0x84, /* zero extends */
|
||||
midgard_op_ld_i16 = 0x85, /* sign extends */
|
||||
midgard_op_ld_u32 = 0x88,
|
||||
midgard_op_ld_u64 = 0x8C,
|
||||
midgard_op_ld_u128 = 0x90,
|
||||
midgard_op_ld_u8 = 0x80, /* zero extends */
|
||||
midgard_op_ld_i8 = 0x81, /* sign extends */
|
||||
midgard_op_ld_u16 = 0x84, /* zero extends */
|
||||
midgard_op_ld_i16 = 0x85, /* sign extends */
|
||||
midgard_op_ld_u16_be = 0x86, /* zero extends, big endian */
|
||||
midgard_op_ld_i16_be = 0x87, /* sign extends, big endian */
|
||||
midgard_op_ld_32 = 0x88, /* short2, int, float */
|
||||
midgard_op_ld_32_bswap2 = 0x89, /* 16-bit big endian vector */
|
||||
midgard_op_ld_32_bswap4 = 0x8A, /* 32-bit big endian scalar */
|
||||
midgard_op_ld_64 = 0x8C, /* int2, float2, long */
|
||||
midgard_op_ld_64_bswap2 = 0x8D, /* 16-bit big endian vector */
|
||||
midgard_op_ld_64_bswap4 = 0x8E, /* 32-bit big endian vector */
|
||||
midgard_op_ld_64_bswap8 = 0x8F, /* 64-bit big endian scalar */
|
||||
midgard_op_ld_128 = 0x90, /* float4, long2 */
|
||||
midgard_op_ld_128_bswap2 = 0x91, /* 16-bit big endian vector */
|
||||
midgard_op_ld_128_bswap4 = 0x92, /* 32-bit big endian vector */
|
||||
midgard_op_ld_128_bswap8 = 0x93, /* 64-bit big endian vector */
|
||||
|
||||
midgard_op_ld_attr_32 = 0x94,
|
||||
midgard_op_ld_attr_16 = 0x95,
|
||||
|
|
@ -532,40 +569,67 @@ typedef enum {
|
|||
midgard_op_ld_vary_32u = 0x9A,
|
||||
midgard_op_ld_vary_32i = 0x9B,
|
||||
|
||||
/* Old version of midgard_op_ld_color_buffer_as_fp16, for T720 */
|
||||
midgard_op_ld_color_buffer_as_fp32_old = 0x9C,
|
||||
midgard_op_ld_color_buffer_as_fp16_old = 0x9D,
|
||||
midgard_op_ld_color_buffer_32u_old = 0x9E,
|
||||
/* This instruction behaves differently depending if the gpu is a v4
|
||||
* or a newer gpu. The main difference hinges on which values of the
|
||||
* second argument are valid for each gpu.
|
||||
* TODO: properly document and decode each possible value for the
|
||||
* second argument. */
|
||||
midgard_op_ld_special_32f = 0x9C,
|
||||
midgard_op_ld_special_16f = 0x9D,
|
||||
midgard_op_ld_special_32u = 0x9E,
|
||||
midgard_op_ld_special_32i = 0x9F,
|
||||
|
||||
/* The distinction between these ops is the alignment requirement /
|
||||
* accompanying shift. Thus, the offset to ld_ubo_int4 is in 16-byte
|
||||
* units and can load 128-bit. The offset to ld_ubo_short4 is in 8-byte
|
||||
* units; ld_ubo_char4 in 4-byte units. ld_ubo_char/ld_ubo_char2 are
|
||||
* purely theoretical (never seen in the wild) since int8/int16/fp16
|
||||
* UBOs don't really exist. The ops are still listed to maintain
|
||||
* symmetry with generic I/O ops. */
|
||||
|
||||
midgard_op_ld_ubo_u8 = 0xA0, /* theoretical */
|
||||
midgard_op_ld_ubo_u16 = 0xA4, /* theoretical */
|
||||
midgard_op_ld_ubo_u32 = 0xA8,
|
||||
midgard_op_ld_ubo_u64 = 0xAC,
|
||||
midgard_op_ld_ubo_u128 = 0xB0,
|
||||
/* The distinction between these ops is the alignment
|
||||
* requirement / accompanying shift. Thus, the offset to
|
||||
* ld_ubo_128 is in 16-byte units and can load 128-bit. The
|
||||
* offset to ld_ubo_64 is in 8-byte units; ld_ubo_32 in 4-byte
|
||||
* units. */
|
||||
midgard_op_ld_ubo_u8 = 0xA0, /* theoretical */
|
||||
midgard_op_ld_ubo_i8 = 0xA1, /* theoretical */
|
||||
midgard_op_ld_ubo_u16 = 0xA4, /* theoretical */
|
||||
midgard_op_ld_ubo_i16 = 0xA5, /* theoretical */
|
||||
midgard_op_ld_ubo_u16_be = 0xA6, /* theoretical */
|
||||
midgard_op_ld_ubo_i16_be = 0xA7, /* theoretical */
|
||||
midgard_op_ld_ubo_32 = 0xA8,
|
||||
midgard_op_ld_ubo_32_bswap2 = 0xA9,
|
||||
midgard_op_ld_ubo_32_bswap4 = 0xAA,
|
||||
midgard_op_ld_ubo_64 = 0xAC,
|
||||
midgard_op_ld_ubo_64_bswap2 = 0xAD,
|
||||
midgard_op_ld_ubo_64_bswap4 = 0xAE,
|
||||
midgard_op_ld_ubo_64_bswap8 = 0xAF,
|
||||
midgard_op_ld_ubo_128 = 0xB0,
|
||||
midgard_op_ld_ubo_128_bswap2 = 0xB1,
|
||||
midgard_op_ld_ubo_128_bswap4 = 0xB2,
|
||||
midgard_op_ld_ubo_128_bswap8 = 0xB3,
|
||||
|
||||
midgard_op_ld_image_32f = 0xB4,
|
||||
midgard_op_ld_image_16f = 0xB5,
|
||||
midgard_op_ld_image_32u = 0xB6,
|
||||
midgard_op_ld_image_32i = 0xB7,
|
||||
|
||||
/* New-style blending ops. Works on T760/T860 */
|
||||
midgard_op_ld_color_buffer_as_fp32 = 0xB8,
|
||||
midgard_op_ld_color_buffer_as_fp16 = 0xB9,
|
||||
midgard_op_ld_color_buffer_32u = 0xBA,
|
||||
/* Only works on v5 or newer.
|
||||
* Older cards must use ld_special with tilebuffer selectors. */
|
||||
midgard_op_ld_tilebuffer_32f = 0xB8,
|
||||
midgard_op_ld_tilebuffer_16f = 0xB9,
|
||||
midgard_op_ld_tilebuffer_raw = 0xBA,
|
||||
|
||||
midgard_op_st_u8 = 0xC0,
|
||||
midgard_op_st_u16 = 0xC4,
|
||||
midgard_op_st_u32 = 0xC8,
|
||||
midgard_op_st_u64 = 0xCC,
|
||||
midgard_op_st_u128 = 0xD0,
|
||||
midgard_op_st_u8 = 0xC0, /* zero extends */
|
||||
midgard_op_st_i8 = 0xC1, /* sign extends */
|
||||
midgard_op_st_u16 = 0xC4, /* zero extends */
|
||||
midgard_op_st_i16 = 0xC5, /* sign extends */
|
||||
midgard_op_st_u16_be = 0xC6, /* zero extends, big endian */
|
||||
midgard_op_st_i16_be = 0xC7, /* sign extends, big endian */
|
||||
midgard_op_st_32 = 0xC8, /* short2, int, float */
|
||||
midgard_op_st_32_bswap2 = 0xC9, /* 16-bit big endian vector */
|
||||
midgard_op_st_32_bswap4 = 0xCA, /* 32-bit big endian scalar */
|
||||
midgard_op_st_64 = 0xCC, /* int2, float2, long */
|
||||
midgard_op_st_64_bswap2 = 0xCD, /* 16-bit big endian vector */
|
||||
midgard_op_st_64_bswap4 = 0xCE, /* 32-bit big endian vector */
|
||||
midgard_op_st_64_bswap8 = 0xCF, /* 64-bit big endian scalar */
|
||||
midgard_op_st_128 = 0xD0, /* float4, long2 */
|
||||
midgard_op_st_128_bswap2 = 0xD1, /* 16-bit big endian vector */
|
||||
midgard_op_st_128_bswap4 = 0xD2, /* 32-bit big endian vector */
|
||||
midgard_op_st_128_bswap8 = 0xD3, /* 64-bit big endian vector */
|
||||
|
||||
midgard_op_st_vary_32 = 0xD4,
|
||||
midgard_op_st_vary_16 = 0xD5,
|
||||
|
|
@ -577,6 +641,18 @@ typedef enum {
|
|||
midgard_op_st_image_16f = 0xD9,
|
||||
midgard_op_st_image_32u = 0xDA,
|
||||
midgard_op_st_image_32i = 0xDB,
|
||||
|
||||
midgard_op_st_special_32f = 0xDC,
|
||||
midgard_op_st_special_16f = 0xDD,
|
||||
midgard_op_st_special_32u = 0xDE,
|
||||
midgard_op_st_special_32i = 0xDF,
|
||||
|
||||
/* Only works on v5 or newer.
|
||||
* Older cards must use ld_special with tilebuffer selectors. */
|
||||
midgard_op_st_tilebuffer_32f = 0xE8,
|
||||
midgard_op_st_tilebuffer_16f = 0xE9,
|
||||
midgard_op_st_tilebuffer_raw = 0xEA,
|
||||
midgard_op_trap = 0xFC,
|
||||
} midgard_load_store_op;
|
||||
|
||||
typedef enum {
|
||||
|
|
|
|||
|
|
@ -132,19 +132,19 @@ schedule_barrier(compiler_context *ctx)
|
|||
|
||||
M_LOAD(ld_attr_32, nir_type_uint32);
|
||||
M_LOAD(ld_vary_32, nir_type_uint32);
|
||||
M_LOAD(ld_ubo_u128, nir_type_uint32);
|
||||
M_LOAD(ld_u32, nir_type_uint32);
|
||||
M_LOAD(ld_u64, nir_type_uint32);
|
||||
M_LOAD(ld_u128, nir_type_uint32);
|
||||
M_STORE(st_u32, nir_type_uint32);
|
||||
M_STORE(st_u64, nir_type_uint32);
|
||||
M_STORE(st_u128, nir_type_uint32);
|
||||
M_LOAD(ld_color_buffer_32u, nir_type_uint32);
|
||||
M_LOAD(ld_color_buffer_as_fp16, nir_type_float16);
|
||||
M_LOAD(ld_color_buffer_as_fp32, nir_type_float32);
|
||||
M_LOAD(ld_ubo_128, nir_type_uint32);
|
||||
M_LOAD(ld_32, nir_type_uint32);
|
||||
M_LOAD(ld_64, nir_type_uint32);
|
||||
M_LOAD(ld_128, nir_type_uint32);
|
||||
M_STORE(st_32, nir_type_uint32);
|
||||
M_STORE(st_64, nir_type_uint32);
|
||||
M_STORE(st_128, nir_type_uint32);
|
||||
M_LOAD(ld_tilebuffer_raw, nir_type_uint32);
|
||||
M_LOAD(ld_tilebuffer_16f, nir_type_float16);
|
||||
M_LOAD(ld_tilebuffer_32f, nir_type_float32);
|
||||
M_STORE(st_vary_32, nir_type_uint32);
|
||||
M_LOAD(ld_cubemap_coords, nir_type_uint32);
|
||||
M_LOAD(ld_compute_id, nir_type_uint32);
|
||||
M_LOAD(ldst_mov, nir_type_uint32);
|
||||
M_LOAD(ld_image_32f, nir_type_float32);
|
||||
M_LOAD(ld_image_16f, nir_type_float16);
|
||||
M_LOAD(ld_image_32u, nir_type_uint32);
|
||||
|
|
@ -153,7 +153,7 @@ M_STORE(st_image_32f, nir_type_float32);
|
|||
M_STORE(st_image_16f, nir_type_float16);
|
||||
M_STORE(st_image_32u, nir_type_uint32);
|
||||
M_STORE(st_image_32i, nir_type_int32);
|
||||
M_LOAD(lea_tex, nir_type_uint64);
|
||||
M_LOAD(lea_image, nir_type_uint64);
|
||||
|
||||
#define M_IMAGE(op) \
|
||||
static midgard_instruction \
|
||||
|
|
@ -1155,7 +1155,7 @@ emit_ubo_read(
|
|||
{
|
||||
/* TODO: half-floats */
|
||||
|
||||
midgard_instruction ins = m_ld_ubo_u128(dest, 0);
|
||||
midgard_instruction ins = m_ld_ubo_128(dest, 0);
|
||||
ins.constants.u32[0] = offset;
|
||||
|
||||
if (instr->type == nir_instr_type_intrinsic)
|
||||
|
|
@ -1202,11 +1202,11 @@ emit_global(
|
|||
nir_dest_num_components(intr->dest);
|
||||
|
||||
if (bitsize <= 32)
|
||||
ins = m_ld_u32(srcdest, 0);
|
||||
ins = m_ld_32(srcdest, 0);
|
||||
else if (bitsize <= 64)
|
||||
ins = m_ld_u64(srcdest, 0);
|
||||
ins = m_ld_64(srcdest, 0);
|
||||
else if (bitsize <= 128)
|
||||
ins = m_ld_u128(srcdest, 0);
|
||||
ins = m_ld_128(srcdest, 0);
|
||||
else
|
||||
unreachable("Invalid global read size");
|
||||
} else {
|
||||
|
|
@ -1214,11 +1214,11 @@ emit_global(
|
|||
nir_src_num_components(intr->src[0]);
|
||||
|
||||
if (bitsize <= 32)
|
||||
ins = m_st_u32(srcdest, 0);
|
||||
ins = m_st_32(srcdest, 0);
|
||||
else if (bitsize <= 64)
|
||||
ins = m_st_u64(srcdest, 0);
|
||||
ins = m_st_64(srcdest, 0);
|
||||
else if (bitsize <= 128)
|
||||
ins = m_st_u128(srcdest, 0);
|
||||
ins = m_st_128(srcdest, 0);
|
||||
else
|
||||
unreachable("Invalid global store size");
|
||||
}
|
||||
|
|
@ -1241,7 +1241,7 @@ emit_global(
|
|||
/* If is_shared is off, the only other possible value are globals, since
|
||||
* SSBO's are being lowered to globals through a NIR pass.
|
||||
* `image_direct_address` should be ~0 when instr is not an image_atomic
|
||||
* and the destination register of a lea_tex op when it is an image_atomic. */
|
||||
* and the destination register of a lea_image op when it is an image_atomic. */
|
||||
static void
|
||||
emit_atomic(
|
||||
compiler_context *ctx,
|
||||
|
|
@ -1370,7 +1370,7 @@ emit_varying_read(
|
|||
}
|
||||
|
||||
|
||||
/* If `is_atomic` is true, we emit a `lea_tex` since midgard doesn't not have special
|
||||
/* If `is_atomic` is true, we emit a `lea_image` since midgard doesn't not have special
|
||||
* image_atomic opcodes. The caller can then use that address to emit a normal atomic opcode. */
|
||||
static midgard_instruction
|
||||
emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
|
||||
|
|
@ -1405,9 +1405,9 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic)
|
|||
ins = st_image(type, val, address);
|
||||
nir_alu_type base_type = nir_alu_type_get_base_type(type);
|
||||
ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]);
|
||||
} else if (is_atomic) { /* emit lea_tex */
|
||||
} else if (is_atomic) { /* emit lea_image */
|
||||
unsigned dest = make_compiler_temp_reg(ctx);
|
||||
ins = m_lea_tex(dest, address);
|
||||
ins = m_lea_image(dest, address);
|
||||
ins.mask = mask_of(2); /* 64-bit memory address */
|
||||
} else { /* emit ld_image_* */
|
||||
nir_alu_type type = nir_intrinsic_dest_type(instr);
|
||||
|
|
@ -1564,7 +1564,7 @@ static void
|
|||
emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
|
||||
{
|
||||
unsigned reg = nir_dest_index(&instr->dest);
|
||||
midgard_instruction ins = m_ld_compute_id(reg, 0);
|
||||
midgard_instruction ins = m_ldst_mov(reg, 0);
|
||||
ins.mask = mask_of(3);
|
||||
ins.swizzle[0][3] = COMPONENT_X; /* xyzx */
|
||||
ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic);
|
||||
|
|
@ -1596,8 +1596,8 @@ emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx)
|
|||
{
|
||||
unsigned reg = nir_dest_index(&instr->dest);
|
||||
|
||||
midgard_instruction ld = m_ld_color_buffer_32u(reg, 0);
|
||||
ld.op = midgard_op_ld_color_buffer_32u_old;
|
||||
midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
|
||||
ld.op = midgard_op_ld_special_32u;
|
||||
ld.load_store.address = idx;
|
||||
ld.load_store.arg_2 = 0x1E;
|
||||
|
||||
|
|
@ -1788,7 +1788,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
/* T720 and below use different blend opcodes with slightly
|
||||
* different semantics than T760 and up */
|
||||
|
||||
midgard_instruction ld = m_ld_color_buffer_32u(reg, 0);
|
||||
midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0);
|
||||
|
||||
ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
|
||||
|
||||
|
|
@ -1801,7 +1801,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
}
|
||||
|
||||
if (ctx->quirks & MIDGARD_OLD_BLEND) {
|
||||
ld.op = midgard_op_ld_color_buffer_32u_old;
|
||||
ld.op = midgard_op_ld_special_32u;
|
||||
ld.load_store.address = 16;
|
||||
ld.load_store.arg_2 = 0x1E;
|
||||
}
|
||||
|
|
@ -1817,9 +1817,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
|
||||
midgard_instruction ld;
|
||||
if (bits == 16)
|
||||
ld = m_ld_color_buffer_as_fp16(reg, 0);
|
||||
ld = m_ld_tilebuffer_16f(reg, 0);
|
||||
else
|
||||
ld = m_ld_color_buffer_as_fp32(reg, 0);
|
||||
ld = m_ld_tilebuffer_32f(reg, 0);
|
||||
|
||||
ld.load_store.arg_2 = output_load_rt_addr(ctx, instr);
|
||||
|
||||
|
|
@ -1828,9 +1828,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
|
|||
|
||||
if (ctx->quirks & MIDGARD_OLD_BLEND) {
|
||||
if (bits == 16)
|
||||
ld.op = midgard_op_ld_color_buffer_as_fp16_old;
|
||||
ld.op = midgard_op_ld_special_16f;
|
||||
else
|
||||
ld.op = midgard_op_ld_color_buffer_as_fp32_old;
|
||||
ld.op = midgard_op_ld_special_32f;
|
||||
ld.load_store.address = 1;
|
||||
ld.load_store.arg_2 = 0x1E;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -436,13 +436,13 @@ midgard_pack_common_store_mask(midgard_instruction *ins) {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case midgard_op_st_u32:
|
||||
case midgard_op_st_u64:
|
||||
case midgard_op_st_u128: {
|
||||
case midgard_op_st_32:
|
||||
case midgard_op_st_64:
|
||||
case midgard_op_st_128: {
|
||||
unsigned total_sz = 32;
|
||||
if (ins->op == midgard_op_st_u128)
|
||||
if (ins->op == midgard_op_st_128)
|
||||
total_sz = 128;
|
||||
else if (ins->op == midgard_op_st_u64)
|
||||
else if (ins->op == midgard_op_st_64)
|
||||
total_sz = 64;
|
||||
|
||||
nr_comp = total_sz / comp_sz;
|
||||
|
|
|
|||
|
|
@ -202,88 +202,162 @@ struct mir_op_props alu_opcode_props[256] = {
|
|||
#define M64 midgard_reg_mode_64
|
||||
|
||||
struct mir_ldst_op_props load_store_opcode_props[256] = {
|
||||
[midgard_op_unpack_colour] = {"unpack_colour", M32},
|
||||
[midgard_op_pack_colour] = {"pack_colour", M32},
|
||||
[midgard_op_pack_colour_32] = {"pack_colour_32", M32},
|
||||
[midgard_op_lea_tex] = {"lea_tex", M32},
|
||||
[midgard_op_ld_cubemap_coords] = {"ld_cubemap_coords", M32},
|
||||
[midgard_op_ld_compute_id] = {"ld_compute_id", M32},
|
||||
[midgard_op_ldst_perspective_division_z] = {"ldst_perspective_division_z", M32},
|
||||
[midgard_op_ldst_perspective_division_w] = {"ldst_perspective_division_w", M32},
|
||||
[midgard_op_unpack_colour_f32] = {"UNPACK.f32", M32},
|
||||
[midgard_op_unpack_colour_f16] = {"UNPACK.f16", M32},
|
||||
[midgard_op_unpack_colour_u32] = {"UNPACK.u32", M32},
|
||||
[midgard_op_unpack_colour_s32] = {"UNPACK.s32", M32},
|
||||
[midgard_op_pack_colour_f32] = {"PACK.f32", M32},
|
||||
[midgard_op_pack_colour_f16] = {"PACK.f16", M32},
|
||||
[midgard_op_pack_colour_u32] = {"PACK.u32", M32},
|
||||
[midgard_op_pack_colour_s32] = {"PACK.s32", M32},
|
||||
[midgard_op_lea] = {"LEA", M32 | LDST_ADDRESS },
|
||||
[midgard_op_lea_image] = {"LEA_IMAGE", M32 | LDST_ATTRIB },
|
||||
[midgard_op_ld_cubemap_coords] = {"CUBEMAP", M32},
|
||||
[midgard_op_ldst_mov] = {"LDST_MOV", M32},
|
||||
[midgard_op_ldst_perspective_div_y] = {"LDST_PERSPECTIVE_DIV_Y", M32},
|
||||
[midgard_op_ldst_perspective_div_z] = {"LDST_PERSPECTIVE_DIV_Z", M32},
|
||||
[midgard_op_ldst_perspective_div_w] = {"LDST_PERSPECTIVE_DIV_W", M32},
|
||||
|
||||
[midgard_op_atomic_add] = {"atomic_add", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_and] = {"atomic_and", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_or] = {"atomic_or", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xor] = {"atomic_xor", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imin] = {"atomic_imin", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umin] = {"atomic_umin", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imax] = {"atomic_imax", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umax] = {"atomic_umax", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xchg] = {"atomic_xchg", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_cmpxchg] = {"atomic_cmpxchg", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_add] = {"AADD.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_and] = {"AAND.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_or] = {"AOR.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xor] = {"AXOR.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imin] = {"AMIN.s32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umin] = {"AMIN.u32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imax] = {"AMAX.s32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umax] = {"AMAX.u32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xchg] = {"XCHG.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_cmpxchg] = {"CMPXCHG.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
|
||||
[midgard_op_atomic_add64] = {"atomic_add64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_and64] = {"atomic_and64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_or64] = {"atomic_or64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xor64] = {"atomic_xor64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imin64] = {"atomic_imin64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umin64] = {"atomic_umin64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imax64] = {"atomic_imax64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umax64] = {"atomic_umax64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xchg64] = {"atomic_xchg64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_cmpxchg64] = {"atomic_cmpxchg64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_add64] = {"AADD.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_and64] = {"AAND.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_or64] = {"AOR.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xor64] = {"AXOR.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imin64] = {"AMIN.s64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umin64] = {"AMIN.u64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imax64] = {"AMAX.s64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umax64] = {"AMAX.u64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xchg64] = {"XCHG.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_cmpxchg64] = {"CMPXCHG.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
|
||||
[midgard_op_ld_u8] = {"ld_u8", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_i8] = {"ld_i8", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_u16] = {"ld_u16", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_i16] = {"ld_i16", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_u32] = {"ld_u32", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_u64] = {"ld_u64", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_u128] = {"ld_u128", M32 | LDST_ADDRESS},
|
||||
[midgard_op_atomic_add_be] = {"AADD.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_and_be] = {"AAND.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_or_be] = {"AOR.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xor_be] = {"AXOR.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imin_be] = {"AMIN.s32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umin_be] = {"AMIN.u32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imax_be] = {"AMAX.s32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umax_be] = {"AMAX.u32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xchg_be] = {"XCHG.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_cmpxchg_be] = {"CMPXCHG.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
|
||||
[midgard_op_ld_attr_32] = {"ld_attr_32", M32},
|
||||
[midgard_op_ld_attr_32i] = {"ld_attr_32i", M32},
|
||||
[midgard_op_ld_attr_32u] = {"ld_attr_32u", M32},
|
||||
[midgard_op_ld_attr_16] = {"ld_attr_16", M32},
|
||||
[midgard_op_atomic_add64] = {"AADD.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_and64] = {"AAND.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_or64] = {"AOR.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xor64] = {"AXOR.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imin64] = {"AMIN.s64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umin64] = {"AMIN.u64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_imax64] = {"AMAX.s64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_umax64] = {"AMAX.u64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_xchg64] = {"XCHG.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
[midgard_op_atomic_cmpxchg64] = {"CMPXCHG.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC},
|
||||
|
||||
[midgard_op_ld_vary_32] = {"ld_vary_32", M32},
|
||||
[midgard_op_ld_vary_16] = {"ld_vary_16", M32},
|
||||
[midgard_op_ld_vary_32i] = {"ld_vary_32i", M32},
|
||||
[midgard_op_ld_vary_32u] = {"ld_vary_32u", M32},
|
||||
[midgard_op_ld_u8] = {"LD.u8", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_i8] = {"LD.s8", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_u16] = {"LD.u16", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_i16] = {"LD.s16", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_u16_be] = {"LD.u16.be", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_i16_be] = {"LD.s16.be", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_32] = {"LD.32", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_32_bswap2] = {"LD.32.bswap2", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_32_bswap4] = {"LD.32.bswap4", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_64] = {"LD.64", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_64_bswap2] = {"LD.64.bswap2", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_64_bswap4] = {"LD.64.bswap4", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_64_bswap8] = {"LD.64.bswap8", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_128] = {"LD.128", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_128_bswap2] = {"LD.128.bswap2", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_128_bswap4] = {"LD.128.bswap4", M32 | LDST_ADDRESS},
|
||||
[midgard_op_ld_128_bswap8] = {"LD.128.bswap8", M32 | LDST_ADDRESS},
|
||||
|
||||
[midgard_op_ld_color_buffer_32u] = {"ld_color_buffer_32u", M32},
|
||||
[midgard_op_ld_color_buffer_32u_old] = {"ld_color_buffer_32u_old", M32},
|
||||
[midgard_op_ld_color_buffer_as_fp16] = {"ld_color_buffer_as_fp16", M16},
|
||||
[midgard_op_ld_color_buffer_as_fp32] = {"ld_color_buffer_as_fp32", M32},
|
||||
[midgard_op_ld_color_buffer_as_fp16_old] = {"ld_color_buffer_as_fp16_old", M16 | LDST_SPECIAL_MASK},
|
||||
[midgard_op_ld_color_buffer_as_fp32_old] = {"ld_color_buffer_as_fp32_old", M32 | LDST_SPECIAL_MASK},
|
||||
[midgard_op_ld_attr_32] = {"LD_ATTR.f32", M32 | LDST_ATTRIB},
|
||||
[midgard_op_ld_attr_32i] = {"LD_ATTR.s32", M32 | LDST_ATTRIB},
|
||||
[midgard_op_ld_attr_32u] = {"LD_ATTR.u32", M32 | LDST_ATTRIB},
|
||||
[midgard_op_ld_attr_16] = {"LD_ATTR.f16", M32 | LDST_ATTRIB},
|
||||
|
||||
[midgard_op_ld_ubo_u8] = {"ld_ubo_u8", M32},
|
||||
[midgard_op_ld_ubo_u16] = {"ld_ubo_u16", M16},
|
||||
[midgard_op_ld_ubo_u32] = {"ld_ubo_u32", M32},
|
||||
[midgard_op_ld_ubo_u64] = {"ld_ubo_u64", M32},
|
||||
[midgard_op_ld_ubo_u128] = {"ld_ubo_u128", M32},
|
||||
[midgard_op_ld_vary_32] = {"LD_VARY.f32", M32 | LDST_ATTRIB},
|
||||
[midgard_op_ld_vary_16] = {"LD_VARY.f16", M32 | LDST_ATTRIB},
|
||||
[midgard_op_ld_vary_32i] = {"LD_VARY.s32", M32 | LDST_ATTRIB},
|
||||
[midgard_op_ld_vary_32u] = {"LD_VARY.u32", M32 | LDST_ATTRIB},
|
||||
|
||||
[midgard_op_ld_image_32f] = {"ld_image_32f", M32},
|
||||
[midgard_op_ld_image_16f] = {"ld_image_16f", M16},
|
||||
[midgard_op_ld_image_32i] = {"ld_image_32i", M32},
|
||||
[midgard_op_ld_image_32u] = {"ld_image_32u", M32},
|
||||
[midgard_op_ld_special_32f] = {"LD_SPECIAL.f32", M32 | LDST_SPECIAL_MASK},
|
||||
[midgard_op_ld_special_16f] = {"LD_SPECIAL.f16", M16 | LDST_SPECIAL_MASK},
|
||||
[midgard_op_ld_special_32u] = {"LD_SPECIAL.u32", M32},
|
||||
[midgard_op_ld_special_32i] = {"LD_SPECIAL.s32", M32},
|
||||
|
||||
[midgard_op_st_u8] = {"st_u8", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_u16] = {"st_u16", M16 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_u32] = {"st_u32", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_u64] = {"st_u64", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_u128] = {"st_u128", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_ld_tilebuffer_32f] = {"LD_TILEBUFFER.f32", M32},
|
||||
[midgard_op_ld_tilebuffer_16f] = {"LD_TILEBUFFER.f16", M16},
|
||||
[midgard_op_ld_tilebuffer_raw] = {"LD_TILEBUFFER.raw", M32},
|
||||
|
||||
[midgard_op_st_vary_32] = {"st_vary_32", M32 | LDST_STORE},
|
||||
[midgard_op_st_vary_32i] = {"st_vary_32i", M32 | LDST_STORE},
|
||||
[midgard_op_st_vary_32u] = {"st_vary_32u", M32 | LDST_STORE},
|
||||
[midgard_op_st_vary_16] = {"st_vary_16", M16 | LDST_STORE},
|
||||
[midgard_op_ld_ubo_u8] = {"LD_UBO.u8", M32},
|
||||
[midgard_op_ld_ubo_i8] = {"LD_UBO.s8", M32},
|
||||
[midgard_op_ld_ubo_u16] = {"LD_UBO.u16", M16},
|
||||
[midgard_op_ld_ubo_i16] = {"LD_UBO.s16", M16},
|
||||
[midgard_op_ld_ubo_u16_be] = {"LD_UBO.u16.be", M16},
|
||||
[midgard_op_ld_ubo_i16_be] = {"LD_UBO.s16.be", M16},
|
||||
[midgard_op_ld_ubo_32] = {"LD_UBO.32", M32},
|
||||
[midgard_op_ld_ubo_32_bswap2] = {"LD_UBO.32.bswap2", M32},
|
||||
[midgard_op_ld_ubo_32_bswap4] = {"LD_UBO.32.bswap4", M32},
|
||||
[midgard_op_ld_ubo_64] = {"LD_UBO.64", M32},
|
||||
[midgard_op_ld_ubo_64_bswap2] = {"LD_UBO.64.bswap2", M32},
|
||||
[midgard_op_ld_ubo_64_bswap4] = {"LD_UBO.64.bswap4", M32},
|
||||
[midgard_op_ld_ubo_64_bswap8] = {"LD_UBO.64.bswap8", M32},
|
||||
[midgard_op_ld_ubo_128] = {"LD_UBO.128", M32},
|
||||
[midgard_op_ld_ubo_128_bswap2] = {"LD_UBO.128.bswap2", M32},
|
||||
[midgard_op_ld_ubo_128_bswap4] = {"LD_UBO.128.bswap4", M32},
|
||||
[midgard_op_ld_ubo_128_bswap8] = {"LD_UBO.128.bswap8", M32},
|
||||
|
||||
[midgard_op_st_image_32f] = {"st_image_32f", M32 | LDST_STORE},
|
||||
[midgard_op_st_image_16f] = {"st_image_16f", M16 | LDST_STORE},
|
||||
[midgard_op_st_image_32i] = {"st_image_32i", M32 | LDST_STORE},
|
||||
[midgard_op_st_image_32u] = {"st_image_32u", M32 | LDST_STORE},
|
||||
[midgard_op_ld_image_32f] = {"LD_IMAGE.f32", M32 | LDST_ATTRIB},
|
||||
[midgard_op_ld_image_16f] = {"LD_IMAGE.f16", M16 | LDST_ATTRIB},
|
||||
[midgard_op_ld_image_32i] = {"LD_IMAGE.s32", M32 | LDST_ATTRIB},
|
||||
[midgard_op_ld_image_32u] = {"LD_IMAGE.u32", M32 | LDST_ATTRIB},
|
||||
|
||||
[midgard_op_st_u8] = {"ST.u8", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_i8] = {"ST.s8", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_u16] = {"ST.u16", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_i16] = {"ST.s16", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_u16_be] = {"ST.u16.be", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_i16_be] = {"ST.s16.be", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_32] = {"ST.32", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_32_bswap2] = {"ST.32.bswap2", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_32_bswap4] = {"ST.32.bswap4", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_64] = {"ST.64", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_64_bswap2] = {"ST.64.bswap2", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_64_bswap4] = {"ST.64.bswap4", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_64_bswap8] = {"ST.64.bswap8", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_128] = {"ST.128", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_128_bswap2] = {"ST.128.bswap2", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_128_bswap4] = {"ST.128.bswap4", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
[midgard_op_st_128_bswap8] = {"ST.128.bswap8", M32 | LDST_STORE | LDST_ADDRESS},
|
||||
|
||||
[midgard_op_st_vary_32] = {"ST_VARY.f32", M32 | LDST_STORE | LDST_ATTRIB},
|
||||
[midgard_op_st_vary_32i] = {"ST_VARY.s32", M32 | LDST_STORE | LDST_ATTRIB},
|
||||
[midgard_op_st_vary_32u] = {"ST_VARY.u32", M32 | LDST_STORE | LDST_ATTRIB},
|
||||
[midgard_op_st_vary_16] = {"ST_VARY.f16", M16 | LDST_STORE | LDST_ATTRIB},
|
||||
|
||||
[midgard_op_st_image_32f] = {"ST_IMAGE.f32", M32 | LDST_STORE | LDST_ATTRIB},
|
||||
[midgard_op_st_image_16f] = {"ST_IMAGE.f16", M16 | LDST_STORE | LDST_ATTRIB},
|
||||
[midgard_op_st_image_32i] = {"ST_IMAGE.u32", M32 | LDST_STORE | LDST_ATTRIB},
|
||||
[midgard_op_st_image_32u] = {"ST_IMAGE.s32", M32 | LDST_STORE | LDST_ATTRIB},
|
||||
|
||||
[midgard_op_st_special_32f] = {"ST_SPECIAL.f32", M32},
|
||||
[midgard_op_st_special_16f] = {"ST_SPECIAL.f16", M16},
|
||||
[midgard_op_st_special_32u] = {"ST_SPECIAL.u32", M32},
|
||||
[midgard_op_st_special_32i] = {"ST_SPECIAL.s32", M32},
|
||||
|
||||
[midgard_op_st_tilebuffer_32f] = {"ST_TILEBUFFER.f32", M32},
|
||||
[midgard_op_st_tilebuffer_16f] = {"ST_TILEBUFFER.f16", M16},
|
||||
[midgard_op_st_tilebuffer_raw] = {"ST_TILEBUFFER.raw", M32},
|
||||
};
|
||||
|
||||
#undef M8
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ extern struct mir_ldst_op_props load_store_opcode_props[256];
|
|||
extern struct mir_tag_props midgard_tag_props[16];
|
||||
|
||||
#define OP_IS_ATOMIC(op) (load_store_opcode_props[op].props & LDST_ATOMIC)
|
||||
#define OP_USES_ATTRIB(op) (load_store_opcode_props[op].props & LDST_ATTRIB)
|
||||
#define OP_IS_STORE(op) (load_store_opcode_props[op].props & LDST_STORE)
|
||||
#define OP_HAS_ADDRESS(op) (load_store_opcode_props[op].props & LDST_ADDRESS)
|
||||
|
||||
|
|
|
|||
|
|
@ -122,8 +122,8 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block)
|
|||
.src_types = { nir_type_float32 },
|
||||
.swizzle = SWIZZLE_IDENTITY_4,
|
||||
.op = frcp_component == COMPONENT_W ?
|
||||
midgard_op_ldst_perspective_division_w :
|
||||
midgard_op_ldst_perspective_division_z,
|
||||
midgard_op_ldst_perspective_div_w :
|
||||
midgard_op_ldst_perspective_div_z,
|
||||
.load_store = {
|
||||
.arg_1 = 0x20
|
||||
}
|
||||
|
|
@ -175,7 +175,7 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block)
|
|||
break;
|
||||
|
||||
bool projects_w =
|
||||
ins->op == midgard_op_ldst_perspective_division_w;
|
||||
ins->op == midgard_op_ldst_perspective_div_w;
|
||||
|
||||
p.modifier = projects_w ?
|
||||
midgard_varying_mod_perspective_w :
|
||||
|
|
|
|||
|
|
@ -986,7 +986,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
|
|||
.dest_type = ins->src_types[i],
|
||||
.src = { ~0, ~0, ~0, ~0 },
|
||||
.swizzle = SWIZZLE_IDENTITY_4,
|
||||
.op = midgard_op_ld_ubo_u128,
|
||||
.op = midgard_op_ld_ubo_128,
|
||||
.load_store = {
|
||||
.arg_1 = ctx->info->push.words[idx].ubo,
|
||||
.arg_2 = 0x1E,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue