From d2119073de1b7f2a6d5de3f1b262a1d635482ce7 Mon Sep 17 00:00:00 2001 From: Italo Nicola Date: Fri, 16 Apr 2021 10:28:48 +0000 Subject: [PATCH] pan/mdg: improve ldst opcode names and add missing ops Signed-off-by: Italo Nicola Reviewed-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/compiler.h | 2 +- src/panfrost/midgard/helpers.h | 41 +++- src/panfrost/midgard/midgard.h | 170 ++++++++++---- src/panfrost/midgard/midgard_compile.c | 64 +++--- src/panfrost/midgard/midgard_emit.c | 10 +- src/panfrost/midgard/midgard_ops.c | 216 ++++++++++++------ src/panfrost/midgard/midgard_ops.h | 1 + .../midgard/midgard_opt_perspective.c | 6 +- src/panfrost/midgard/midgard_ra.c | 2 +- 9 files changed, 343 insertions(+), 169 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index ab699b5f019..5a5623c2cd3 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -570,7 +570,7 @@ v_load_store_scratch( .dest = ~0, .src = { ~0, ~0, ~0, ~0 }, .swizzle = SWIZZLE_IDENTITY_4, - .op = is_store ? midgard_op_st_u128 : midgard_op_ld_u128, + .op = is_store ? midgard_op_st_128 : midgard_op_ld_128, .load_store = { /* For register spilling - to thread local storage */ .arg_1 = 0xEA, diff --git a/src/panfrost/midgard/helpers.h b/src/panfrost/midgard/helpers.h index fa882f07824..fffe0d21142 100644 --- a/src/panfrost/midgard/helpers.h +++ b/src/panfrost/midgard/helpers.h @@ -33,8 +33,9 @@ ) #define OP_IS_PROJECTION(op) ( \ - op == midgard_op_ldst_perspective_division_z || \ - op == midgard_op_ldst_perspective_division_w \ + op == midgard_op_ldst_perspective_div_y || \ + op == midgard_op_ldst_perspective_div_z || \ + op == midgard_op_ldst_perspective_div_w \ ) #define OP_IS_VEC4_ONLY(op) ( \ @@ -43,16 +44,13 @@ ) #define OP_IS_MOVE(op) ( \ - op == midgard_alu_op_fmov || \ + (op >= midgard_alu_op_fmov && op <= midgard_alu_op_fmov_rtp) || \ op == midgard_alu_op_imov \ ) #define OP_IS_UBO_READ(op) ( \ - op == midgard_op_ld_ubo_u8 || \ - op == midgard_op_ld_ubo_u16 || \ - op == midgard_op_ld_ubo_u32 || \ - op == midgard_op_ld_ubo_u64 || \ - op == midgard_op_ld_ubo_u128 \ + op >= midgard_op_ld_ubo_u8 && \ + op <= midgard_op_ld_ubo_128_bswap8 \ ) #define OP_IS_CSEL_V(op) ( \ @@ -81,7 +79,32 @@ #define OP_IS_COMMON_STORE(op) ( \ op >= midgard_op_st_u8 && \ - op <= midgard_op_st_u128 \ + op <= midgard_op_st_128_bswap8 \ + ) + +#define OP_IS_IMAGE(op) ( \ + (op >= midgard_op_ld_image_32f && op <= midgard_op_ld_image_32i) || \ + (op >= midgard_op_st_image_32f && op <= midgard_op_st_image_32i) || \ + op == midgard_op_lea_image \ + ) + +#define OP_IS_SPECIAL(op) ( \ + (op >= midgard_op_ld_special_32f && op <= midgard_op_ld_special_32i) || \ + (op >= midgard_op_st_special_32f && op <= midgard_op_st_special_32i) \ + ) + +#define OP_IS_PACK_COLOUR(op) ( \ + (op >= midgard_op_pack_colour_f32 && op <= midgard_op_pack_colour_s32) \ + ) + +#define OP_IS_UNPACK_COLOUR(op) ( \ + (op >= midgard_op_unpack_colour_f32 && op <= midgard_op_unpack_colour_s32) \ + ) + +/* Instructions that are on the load/store unit but don't access memory */ +#define OP_IS_REG2REG_LDST(op) ( \ + op >= midgard_op_unpack_colour_f32 && \ + op <= midgard_op_ldst_perspective_div_w \ ) /* ALU control words are single bit fields with a lot of space */ diff --git a/src/panfrost/midgard/midgard.h b/src/panfrost/midgard/midgard.h index 7683bb58a6e..e9559659c1c 100644 --- a/src/panfrost/midgard/midgard.h +++ b/src/panfrost/midgard/midgard.h @@ -458,17 +458,23 @@ midgard_writeout; typedef enum { midgard_op_ld_st_noop = 0x03, - /* Unpack a colour from a native format to fp16 */ - midgard_op_unpack_colour = 0x05, + /* Unpacks a colour from a native format to */ + midgard_op_unpack_colour_f32 = 0x04, + midgard_op_unpack_colour_f16 = 0x05, + midgard_op_unpack_colour_u32 = 0x06, + midgard_op_unpack_colour_s32 = 0x07, - /* Packs a colour from fp16 to a native format */ - midgard_op_pack_colour = 0x09, + /* Packs a colour from to a native format */ + midgard_op_pack_colour_f32 = 0x08, + midgard_op_pack_colour_f16 = 0x09, + midgard_op_pack_colour_u32 = 0x0A, + midgard_op_pack_colour_s32 = 0x0B, - /* Likewise packs from fp32 */ - midgard_op_pack_colour_32 = 0x0A, + /* Computes the effective address of a mem address expression */ + midgard_op_lea = 0x0C, - /* Converts image/tex coordinates into mem address */ - midgard_op_lea_tex = 0x0D, + /* Converts image coordinates into mem address */ + midgard_op_lea_image = 0x0D, /* Unclear why this is on the L/S unit, but moves fp32 cube map * coordinates in r27 to its cube map texture coordinate destination @@ -476,52 +482,83 @@ typedef enum { midgard_op_ld_cubemap_coords = 0x0E, - /* Loads a global/local/group ID, depending on arguments */ - midgard_op_ld_compute_id = 0x10, + /* A mov between registers that the ldst pipeline can access */ + midgard_op_ldst_mov = 0x10, /* The L/S unit can do perspective division a clock faster than the ALU * if you're lucky. Put the vec4 in r27, and call with 0x24 as the * unknown state; the output will be . Replace w with * z for the z version */ - midgard_op_ldst_perspective_division_z = 0x12, - midgard_op_ldst_perspective_division_w = 0x13, + midgard_op_ldst_perspective_div_y = 0x11, + midgard_op_ldst_perspective_div_z = 0x12, + midgard_op_ldst_perspective_div_w = 0x13, /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */ midgard_op_atomic_add = 0x40, midgard_op_atomic_add64 = 0x41, + midgard_op_atomic_add_be = 0x42, + midgard_op_atomic_add64_be = 0x43, midgard_op_atomic_and = 0x44, midgard_op_atomic_and64 = 0x45, + midgard_op_atomic_and_be = 0x46, + midgard_op_atomic_and64_be = 0x47, midgard_op_atomic_or = 0x48, midgard_op_atomic_or64 = 0x49, + midgard_op_atomic_or_be = 0x4A, + midgard_op_atomic_or64_be = 0x4B, midgard_op_atomic_xor = 0x4C, midgard_op_atomic_xor64 = 0x4D, + midgard_op_atomic_xor_be = 0x4E, + midgard_op_atomic_xor64_be = 0x4F, midgard_op_atomic_imin = 0x50, midgard_op_atomic_imin64 = 0x51, + midgard_op_atomic_imin_be = 0x52, + midgard_op_atomic_imin64_be = 0x53, midgard_op_atomic_umin = 0x54, midgard_op_atomic_umin64 = 0x55, + midgard_op_atomic_umin_be = 0x56, + midgard_op_atomic_umin64_be = 0x57, midgard_op_atomic_imax = 0x58, midgard_op_atomic_imax64 = 0x59, + midgard_op_atomic_imax_be = 0x5A, + midgard_op_atomic_imax64_be = 0x5B, midgard_op_atomic_umax = 0x5C, midgard_op_atomic_umax64 = 0x5D, + midgard_op_atomic_umax_be = 0x5E, + midgard_op_atomic_umax64_be = 0x5F, midgard_op_atomic_xchg = 0x60, midgard_op_atomic_xchg64 = 0x61, + midgard_op_atomic_xchg_be = 0x62, + midgard_op_atomic_xchg64_be = 0x63, midgard_op_atomic_cmpxchg = 0x64, midgard_op_atomic_cmpxchg64 = 0x65, + midgard_op_atomic_cmpxchg_be = 0x66, + midgard_op_atomic_cmpxchg64_be = 0x67, - /* Used for compute shader's __global arguments, __local variables (or - * for register spilling) */ + /* Used for compute shader's __global arguments, __local + * variables (or for register spilling) */ - midgard_op_ld_u8 = 0x80, /* zero extends */ - midgard_op_ld_i8 = 0x81, /* sign extends */ - midgard_op_ld_u16 = 0x84, /* zero extends */ - midgard_op_ld_i16 = 0x85, /* sign extends */ - midgard_op_ld_u32 = 0x88, - midgard_op_ld_u64 = 0x8C, - midgard_op_ld_u128 = 0x90, + midgard_op_ld_u8 = 0x80, /* zero extends */ + midgard_op_ld_i8 = 0x81, /* sign extends */ + midgard_op_ld_u16 = 0x84, /* zero extends */ + midgard_op_ld_i16 = 0x85, /* sign extends */ + midgard_op_ld_u16_be = 0x86, /* zero extends, big endian */ + midgard_op_ld_i16_be = 0x87, /* sign extends, big endian */ + midgard_op_ld_32 = 0x88, /* short2, int, float */ + midgard_op_ld_32_bswap2 = 0x89, /* 16-bit big endian vector */ + midgard_op_ld_32_bswap4 = 0x8A, /* 32-bit big endian scalar */ + midgard_op_ld_64 = 0x8C, /* int2, float2, long */ + midgard_op_ld_64_bswap2 = 0x8D, /* 16-bit big endian vector */ + midgard_op_ld_64_bswap4 = 0x8E, /* 32-bit big endian vector */ + midgard_op_ld_64_bswap8 = 0x8F, /* 64-bit big endian scalar */ + midgard_op_ld_128 = 0x90, /* float4, long2 */ + midgard_op_ld_128_bswap2 = 0x91, /* 16-bit big endian vector */ + midgard_op_ld_128_bswap4 = 0x92, /* 32-bit big endian vector */ + midgard_op_ld_128_bswap8 = 0x93, /* 64-bit big endian vector */ midgard_op_ld_attr_32 = 0x94, midgard_op_ld_attr_16 = 0x95, @@ -532,40 +569,67 @@ typedef enum { midgard_op_ld_vary_32u = 0x9A, midgard_op_ld_vary_32i = 0x9B, - /* Old version of midgard_op_ld_color_buffer_as_fp16, for T720 */ - midgard_op_ld_color_buffer_as_fp32_old = 0x9C, - midgard_op_ld_color_buffer_as_fp16_old = 0x9D, - midgard_op_ld_color_buffer_32u_old = 0x9E, + /* This instruction behaves differently depending if the gpu is a v4 + * or a newer gpu. The main difference hinges on which values of the + * second argument are valid for each gpu. + * TODO: properly document and decode each possible value for the + * second argument. */ + midgard_op_ld_special_32f = 0x9C, + midgard_op_ld_special_16f = 0x9D, + midgard_op_ld_special_32u = 0x9E, + midgard_op_ld_special_32i = 0x9F, - /* The distinction between these ops is the alignment requirement / - * accompanying shift. Thus, the offset to ld_ubo_int4 is in 16-byte - * units and can load 128-bit. The offset to ld_ubo_short4 is in 8-byte - * units; ld_ubo_char4 in 4-byte units. ld_ubo_char/ld_ubo_char2 are - * purely theoretical (never seen in the wild) since int8/int16/fp16 - * UBOs don't really exist. The ops are still listed to maintain - * symmetry with generic I/O ops. */ - - midgard_op_ld_ubo_u8 = 0xA0, /* theoretical */ - midgard_op_ld_ubo_u16 = 0xA4, /* theoretical */ - midgard_op_ld_ubo_u32 = 0xA8, - midgard_op_ld_ubo_u64 = 0xAC, - midgard_op_ld_ubo_u128 = 0xB0, + /* The distinction between these ops is the alignment + * requirement / accompanying shift. Thus, the offset to + * ld_ubo_128 is in 16-byte units and can load 128-bit. The + * offset to ld_ubo_64 is in 8-byte units; ld_ubo_32 in 4-byte + * units. */ + midgard_op_ld_ubo_u8 = 0xA0, /* theoretical */ + midgard_op_ld_ubo_i8 = 0xA1, /* theoretical */ + midgard_op_ld_ubo_u16 = 0xA4, /* theoretical */ + midgard_op_ld_ubo_i16 = 0xA5, /* theoretical */ + midgard_op_ld_ubo_u16_be = 0xA6, /* theoretical */ + midgard_op_ld_ubo_i16_be = 0xA7, /* theoretical */ + midgard_op_ld_ubo_32 = 0xA8, + midgard_op_ld_ubo_32_bswap2 = 0xA9, + midgard_op_ld_ubo_32_bswap4 = 0xAA, + midgard_op_ld_ubo_64 = 0xAC, + midgard_op_ld_ubo_64_bswap2 = 0xAD, + midgard_op_ld_ubo_64_bswap4 = 0xAE, + midgard_op_ld_ubo_64_bswap8 = 0xAF, + midgard_op_ld_ubo_128 = 0xB0, + midgard_op_ld_ubo_128_bswap2 = 0xB1, + midgard_op_ld_ubo_128_bswap4 = 0xB2, + midgard_op_ld_ubo_128_bswap8 = 0xB3, midgard_op_ld_image_32f = 0xB4, midgard_op_ld_image_16f = 0xB5, midgard_op_ld_image_32u = 0xB6, midgard_op_ld_image_32i = 0xB7, - /* New-style blending ops. Works on T760/T860 */ - midgard_op_ld_color_buffer_as_fp32 = 0xB8, - midgard_op_ld_color_buffer_as_fp16 = 0xB9, - midgard_op_ld_color_buffer_32u = 0xBA, + /* Only works on v5 or newer. + * Older cards must use ld_special with tilebuffer selectors. */ + midgard_op_ld_tilebuffer_32f = 0xB8, + midgard_op_ld_tilebuffer_16f = 0xB9, + midgard_op_ld_tilebuffer_raw = 0xBA, - midgard_op_st_u8 = 0xC0, - midgard_op_st_u16 = 0xC4, - midgard_op_st_u32 = 0xC8, - midgard_op_st_u64 = 0xCC, - midgard_op_st_u128 = 0xD0, + midgard_op_st_u8 = 0xC0, /* zero extends */ + midgard_op_st_i8 = 0xC1, /* sign extends */ + midgard_op_st_u16 = 0xC4, /* zero extends */ + midgard_op_st_i16 = 0xC5, /* sign extends */ + midgard_op_st_u16_be = 0xC6, /* zero extends, big endian */ + midgard_op_st_i16_be = 0xC7, /* sign extends, big endian */ + midgard_op_st_32 = 0xC8, /* short2, int, float */ + midgard_op_st_32_bswap2 = 0xC9, /* 16-bit big endian vector */ + midgard_op_st_32_bswap4 = 0xCA, /* 32-bit big endian scalar */ + midgard_op_st_64 = 0xCC, /* int2, float2, long */ + midgard_op_st_64_bswap2 = 0xCD, /* 16-bit big endian vector */ + midgard_op_st_64_bswap4 = 0xCE, /* 32-bit big endian vector */ + midgard_op_st_64_bswap8 = 0xCF, /* 64-bit big endian scalar */ + midgard_op_st_128 = 0xD0, /* float4, long2 */ + midgard_op_st_128_bswap2 = 0xD1, /* 16-bit big endian vector */ + midgard_op_st_128_bswap4 = 0xD2, /* 32-bit big endian vector */ + midgard_op_st_128_bswap8 = 0xD3, /* 64-bit big endian vector */ midgard_op_st_vary_32 = 0xD4, midgard_op_st_vary_16 = 0xD5, @@ -577,6 +641,18 @@ typedef enum { midgard_op_st_image_16f = 0xD9, midgard_op_st_image_32u = 0xDA, midgard_op_st_image_32i = 0xDB, + + midgard_op_st_special_32f = 0xDC, + midgard_op_st_special_16f = 0xDD, + midgard_op_st_special_32u = 0xDE, + midgard_op_st_special_32i = 0xDF, + + /* Only works on v5 or newer. + * Older cards must use ld_special with tilebuffer selectors. */ + midgard_op_st_tilebuffer_32f = 0xE8, + midgard_op_st_tilebuffer_16f = 0xE9, + midgard_op_st_tilebuffer_raw = 0xEA, + midgard_op_trap = 0xFC, } midgard_load_store_op; typedef enum { diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 9256fd22ddc..9f945d0dc93 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -132,19 +132,19 @@ schedule_barrier(compiler_context *ctx) M_LOAD(ld_attr_32, nir_type_uint32); M_LOAD(ld_vary_32, nir_type_uint32); -M_LOAD(ld_ubo_u128, nir_type_uint32); -M_LOAD(ld_u32, nir_type_uint32); -M_LOAD(ld_u64, nir_type_uint32); -M_LOAD(ld_u128, nir_type_uint32); -M_STORE(st_u32, nir_type_uint32); -M_STORE(st_u64, nir_type_uint32); -M_STORE(st_u128, nir_type_uint32); -M_LOAD(ld_color_buffer_32u, nir_type_uint32); -M_LOAD(ld_color_buffer_as_fp16, nir_type_float16); -M_LOAD(ld_color_buffer_as_fp32, nir_type_float32); +M_LOAD(ld_ubo_128, nir_type_uint32); +M_LOAD(ld_32, nir_type_uint32); +M_LOAD(ld_64, nir_type_uint32); +M_LOAD(ld_128, nir_type_uint32); +M_STORE(st_32, nir_type_uint32); +M_STORE(st_64, nir_type_uint32); +M_STORE(st_128, nir_type_uint32); +M_LOAD(ld_tilebuffer_raw, nir_type_uint32); +M_LOAD(ld_tilebuffer_16f, nir_type_float16); +M_LOAD(ld_tilebuffer_32f, nir_type_float32); M_STORE(st_vary_32, nir_type_uint32); M_LOAD(ld_cubemap_coords, nir_type_uint32); -M_LOAD(ld_compute_id, nir_type_uint32); +M_LOAD(ldst_mov, nir_type_uint32); M_LOAD(ld_image_32f, nir_type_float32); M_LOAD(ld_image_16f, nir_type_float16); M_LOAD(ld_image_32u, nir_type_uint32); @@ -153,7 +153,7 @@ M_STORE(st_image_32f, nir_type_float32); M_STORE(st_image_16f, nir_type_float16); M_STORE(st_image_32u, nir_type_uint32); M_STORE(st_image_32i, nir_type_int32); -M_LOAD(lea_tex, nir_type_uint64); +M_LOAD(lea_image, nir_type_uint64); #define M_IMAGE(op) \ static midgard_instruction \ @@ -1155,7 +1155,7 @@ emit_ubo_read( { /* TODO: half-floats */ - midgard_instruction ins = m_ld_ubo_u128(dest, 0); + midgard_instruction ins = m_ld_ubo_128(dest, 0); ins.constants.u32[0] = offset; if (instr->type == nir_instr_type_intrinsic) @@ -1202,11 +1202,11 @@ emit_global( nir_dest_num_components(intr->dest); if (bitsize <= 32) - ins = m_ld_u32(srcdest, 0); + ins = m_ld_32(srcdest, 0); else if (bitsize <= 64) - ins = m_ld_u64(srcdest, 0); + ins = m_ld_64(srcdest, 0); else if (bitsize <= 128) - ins = m_ld_u128(srcdest, 0); + ins = m_ld_128(srcdest, 0); else unreachable("Invalid global read size"); } else { @@ -1214,11 +1214,11 @@ emit_global( nir_src_num_components(intr->src[0]); if (bitsize <= 32) - ins = m_st_u32(srcdest, 0); + ins = m_st_32(srcdest, 0); else if (bitsize <= 64) - ins = m_st_u64(srcdest, 0); + ins = m_st_64(srcdest, 0); else if (bitsize <= 128) - ins = m_st_u128(srcdest, 0); + ins = m_st_128(srcdest, 0); else unreachable("Invalid global store size"); } @@ -1241,7 +1241,7 @@ emit_global( /* If is_shared is off, the only other possible value are globals, since * SSBO's are being lowered to globals through a NIR pass. * `image_direct_address` should be ~0 when instr is not an image_atomic - * and the destination register of a lea_tex op when it is an image_atomic. */ + * and the destination register of a lea_image op when it is an image_atomic. */ static void emit_atomic( compiler_context *ctx, @@ -1370,7 +1370,7 @@ emit_varying_read( } -/* If `is_atomic` is true, we emit a `lea_tex` since midgard doesn't not have special +/* If `is_atomic` is true, we emit a `lea_image` since midgard doesn't not have special * image_atomic opcodes. The caller can then use that address to emit a normal atomic opcode. */ static midgard_instruction emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic) @@ -1405,9 +1405,9 @@ emit_image_op(compiler_context *ctx, nir_intrinsic_instr *instr, bool is_atomic) ins = st_image(type, val, address); nir_alu_type base_type = nir_alu_type_get_base_type(type); ins.src_types[0] = base_type | nir_src_bit_size(instr->src[3]); - } else if (is_atomic) { /* emit lea_tex */ + } else if (is_atomic) { /* emit lea_image */ unsigned dest = make_compiler_temp_reg(ctx); - ins = m_lea_tex(dest, address); + ins = m_lea_image(dest, address); ins.mask = mask_of(2); /* 64-bit memory address */ } else { /* emit ld_image_* */ nir_alu_type type = nir_intrinsic_dest_type(instr); @@ -1564,7 +1564,7 @@ static void emit_compute_builtin(compiler_context *ctx, nir_intrinsic_instr *instr) { unsigned reg = nir_dest_index(&instr->dest); - midgard_instruction ins = m_ld_compute_id(reg, 0); + midgard_instruction ins = m_ldst_mov(reg, 0); ins.mask = mask_of(3); ins.swizzle[0][3] = COMPONENT_X; /* xyzx */ ins.load_store.arg_1 = compute_builtin_arg(instr->intrinsic); @@ -1596,8 +1596,8 @@ emit_special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx) { unsigned reg = nir_dest_index(&instr->dest); - midgard_instruction ld = m_ld_color_buffer_32u(reg, 0); - ld.op = midgard_op_ld_color_buffer_32u_old; + midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0); + ld.op = midgard_op_ld_special_32u; ld.load_store.address = idx; ld.load_store.arg_2 = 0x1E; @@ -1788,7 +1788,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) /* T720 and below use different blend opcodes with slightly * different semantics than T760 and up */ - midgard_instruction ld = m_ld_color_buffer_32u(reg, 0); + midgard_instruction ld = m_ld_tilebuffer_raw(reg, 0); ld.load_store.arg_2 = output_load_rt_addr(ctx, instr); @@ -1801,7 +1801,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) } if (ctx->quirks & MIDGARD_OLD_BLEND) { - ld.op = midgard_op_ld_color_buffer_32u_old; + ld.op = midgard_op_ld_special_32u; ld.load_store.address = 16; ld.load_store.arg_2 = 0x1E; } @@ -1817,9 +1817,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) midgard_instruction ld; if (bits == 16) - ld = m_ld_color_buffer_as_fp16(reg, 0); + ld = m_ld_tilebuffer_16f(reg, 0); else - ld = m_ld_color_buffer_as_fp32(reg, 0); + ld = m_ld_tilebuffer_32f(reg, 0); ld.load_store.arg_2 = output_load_rt_addr(ctx, instr); @@ -1828,9 +1828,9 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) if (ctx->quirks & MIDGARD_OLD_BLEND) { if (bits == 16) - ld.op = midgard_op_ld_color_buffer_as_fp16_old; + ld.op = midgard_op_ld_special_16f; else - ld.op = midgard_op_ld_color_buffer_as_fp32_old; + ld.op = midgard_op_ld_special_32f; ld.load_store.address = 1; ld.load_store.arg_2 = 0x1E; } diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c index 712d51d747e..47bd57e1018 100644 --- a/src/panfrost/midgard/midgard_emit.c +++ b/src/panfrost/midgard/midgard_emit.c @@ -436,13 +436,13 @@ midgard_pack_common_store_mask(midgard_instruction *ins) { } } break; - case midgard_op_st_u32: - case midgard_op_st_u64: - case midgard_op_st_u128: { + case midgard_op_st_32: + case midgard_op_st_64: + case midgard_op_st_128: { unsigned total_sz = 32; - if (ins->op == midgard_op_st_u128) + if (ins->op == midgard_op_st_128) total_sz = 128; - else if (ins->op == midgard_op_st_u64) + else if (ins->op == midgard_op_st_64) total_sz = 64; nr_comp = total_sz / comp_sz; diff --git a/src/panfrost/midgard/midgard_ops.c b/src/panfrost/midgard/midgard_ops.c index 4d265496a06..441aa0f99cb 100644 --- a/src/panfrost/midgard/midgard_ops.c +++ b/src/panfrost/midgard/midgard_ops.c @@ -202,88 +202,162 @@ struct mir_op_props alu_opcode_props[256] = { #define M64 midgard_reg_mode_64 struct mir_ldst_op_props load_store_opcode_props[256] = { - [midgard_op_unpack_colour] = {"unpack_colour", M32}, - [midgard_op_pack_colour] = {"pack_colour", M32}, - [midgard_op_pack_colour_32] = {"pack_colour_32", M32}, - [midgard_op_lea_tex] = {"lea_tex", M32}, - [midgard_op_ld_cubemap_coords] = {"ld_cubemap_coords", M32}, - [midgard_op_ld_compute_id] = {"ld_compute_id", M32}, - [midgard_op_ldst_perspective_division_z] = {"ldst_perspective_division_z", M32}, - [midgard_op_ldst_perspective_division_w] = {"ldst_perspective_division_w", M32}, + [midgard_op_unpack_colour_f32] = {"UNPACK.f32", M32}, + [midgard_op_unpack_colour_f16] = {"UNPACK.f16", M32}, + [midgard_op_unpack_colour_u32] = {"UNPACK.u32", M32}, + [midgard_op_unpack_colour_s32] = {"UNPACK.s32", M32}, + [midgard_op_pack_colour_f32] = {"PACK.f32", M32}, + [midgard_op_pack_colour_f16] = {"PACK.f16", M32}, + [midgard_op_pack_colour_u32] = {"PACK.u32", M32}, + [midgard_op_pack_colour_s32] = {"PACK.s32", M32}, + [midgard_op_lea] = {"LEA", M32 | LDST_ADDRESS }, + [midgard_op_lea_image] = {"LEA_IMAGE", M32 | LDST_ATTRIB }, + [midgard_op_ld_cubemap_coords] = {"CUBEMAP", M32}, + [midgard_op_ldst_mov] = {"LDST_MOV", M32}, + [midgard_op_ldst_perspective_div_y] = {"LDST_PERSPECTIVE_DIV_Y", M32}, + [midgard_op_ldst_perspective_div_z] = {"LDST_PERSPECTIVE_DIV_Z", M32}, + [midgard_op_ldst_perspective_div_w] = {"LDST_PERSPECTIVE_DIV_W", M32}, - [midgard_op_atomic_add] = {"atomic_add", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_and] = {"atomic_and", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_or] = {"atomic_or", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_xor] = {"atomic_xor", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_imin] = {"atomic_imin", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_umin] = {"atomic_umin", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_imax] = {"atomic_imax", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_umax] = {"atomic_umax", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_xchg] = {"atomic_xchg", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_cmpxchg] = {"atomic_cmpxchg", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_add] = {"AADD.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_and] = {"AAND.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_or] = {"AOR.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_xor] = {"AXOR.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_imin] = {"AMIN.s32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_umin] = {"AMIN.u32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_imax] = {"AMAX.s32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_umax] = {"AMAX.u32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_xchg] = {"XCHG.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_cmpxchg] = {"CMPXCHG.32", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_add64] = {"atomic_add64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_and64] = {"atomic_and64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_or64] = {"atomic_or64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_xor64] = {"atomic_xor64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_imin64] = {"atomic_imin64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_umin64] = {"atomic_umin64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_imax64] = {"atomic_imax64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_umax64] = {"atomic_umax64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_xchg64] = {"atomic_xchg64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_atomic_cmpxchg64] = {"atomic_cmpxchg64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_add64] = {"AADD.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_and64] = {"AAND.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_or64] = {"AOR.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_xor64] = {"AXOR.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_imin64] = {"AMIN.s64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_umin64] = {"AMIN.u64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_imax64] = {"AMAX.s64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_umax64] = {"AMAX.u64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_xchg64] = {"XCHG.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_cmpxchg64] = {"CMPXCHG.64", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_ld_u8] = {"ld_u8", M32 | LDST_ADDRESS}, - [midgard_op_ld_i8] = {"ld_i8", M32 | LDST_ADDRESS}, - [midgard_op_ld_u16] = {"ld_u16", M32 | LDST_ADDRESS}, - [midgard_op_ld_i16] = {"ld_i16", M32 | LDST_ADDRESS}, - [midgard_op_ld_u32] = {"ld_u32", M32 | LDST_ADDRESS}, - [midgard_op_ld_u64] = {"ld_u64", M32 | LDST_ADDRESS}, - [midgard_op_ld_u128] = {"ld_u128", M32 | LDST_ADDRESS}, + [midgard_op_atomic_add_be] = {"AADD.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_and_be] = {"AAND.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_or_be] = {"AOR.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_xor_be] = {"AXOR.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_imin_be] = {"AMIN.s32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_umin_be] = {"AMIN.u32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_imax_be] = {"AMAX.s32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_umax_be] = {"AMAX.u32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_xchg_be] = {"XCHG.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_cmpxchg_be] = {"CMPXCHG.32.be", M32 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_ld_attr_32] = {"ld_attr_32", M32}, - [midgard_op_ld_attr_32i] = {"ld_attr_32i", M32}, - [midgard_op_ld_attr_32u] = {"ld_attr_32u", M32}, - [midgard_op_ld_attr_16] = {"ld_attr_16", M32}, + [midgard_op_atomic_add64] = {"AADD.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_and64] = {"AAND.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_or64] = {"AOR.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_xor64] = {"AXOR.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_imin64] = {"AMIN.s64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_umin64] = {"AMIN.u64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_imax64] = {"AMAX.s64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_umax64] = {"AMAX.u64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_xchg64] = {"XCHG.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, + [midgard_op_atomic_cmpxchg64] = {"CMPXCHG.64.be", M64 | LDST_SIDE_FX | LDST_ADDRESS | LDST_ATOMIC}, - [midgard_op_ld_vary_32] = {"ld_vary_32", M32}, - [midgard_op_ld_vary_16] = {"ld_vary_16", M32}, - [midgard_op_ld_vary_32i] = {"ld_vary_32i", M32}, - [midgard_op_ld_vary_32u] = {"ld_vary_32u", M32}, + [midgard_op_ld_u8] = {"LD.u8", M32 | LDST_ADDRESS}, + [midgard_op_ld_i8] = {"LD.s8", M32 | LDST_ADDRESS}, + [midgard_op_ld_u16] = {"LD.u16", M32 | LDST_ADDRESS}, + [midgard_op_ld_i16] = {"LD.s16", M32 | LDST_ADDRESS}, + [midgard_op_ld_u16_be] = {"LD.u16.be", M32 | LDST_ADDRESS}, + [midgard_op_ld_i16_be] = {"LD.s16.be", M32 | LDST_ADDRESS}, + [midgard_op_ld_32] = {"LD.32", M32 | LDST_ADDRESS}, + [midgard_op_ld_32_bswap2] = {"LD.32.bswap2", M32 | LDST_ADDRESS}, + [midgard_op_ld_32_bswap4] = {"LD.32.bswap4", M32 | LDST_ADDRESS}, + [midgard_op_ld_64] = {"LD.64", M32 | LDST_ADDRESS}, + [midgard_op_ld_64_bswap2] = {"LD.64.bswap2", M32 | LDST_ADDRESS}, + [midgard_op_ld_64_bswap4] = {"LD.64.bswap4", M32 | LDST_ADDRESS}, + [midgard_op_ld_64_bswap8] = {"LD.64.bswap8", M32 | LDST_ADDRESS}, + [midgard_op_ld_128] = {"LD.128", M32 | LDST_ADDRESS}, + [midgard_op_ld_128_bswap2] = {"LD.128.bswap2", M32 | LDST_ADDRESS}, + [midgard_op_ld_128_bswap4] = {"LD.128.bswap4", M32 | LDST_ADDRESS}, + [midgard_op_ld_128_bswap8] = {"LD.128.bswap8", M32 | LDST_ADDRESS}, - [midgard_op_ld_color_buffer_32u] = {"ld_color_buffer_32u", M32}, - [midgard_op_ld_color_buffer_32u_old] = {"ld_color_buffer_32u_old", M32}, - [midgard_op_ld_color_buffer_as_fp16] = {"ld_color_buffer_as_fp16", M16}, - [midgard_op_ld_color_buffer_as_fp32] = {"ld_color_buffer_as_fp32", M32}, - [midgard_op_ld_color_buffer_as_fp16_old] = {"ld_color_buffer_as_fp16_old", M16 | LDST_SPECIAL_MASK}, - [midgard_op_ld_color_buffer_as_fp32_old] = {"ld_color_buffer_as_fp32_old", M32 | LDST_SPECIAL_MASK}, + [midgard_op_ld_attr_32] = {"LD_ATTR.f32", M32 | LDST_ATTRIB}, + [midgard_op_ld_attr_32i] = {"LD_ATTR.s32", M32 | LDST_ATTRIB}, + [midgard_op_ld_attr_32u] = {"LD_ATTR.u32", M32 | LDST_ATTRIB}, + [midgard_op_ld_attr_16] = {"LD_ATTR.f16", M32 | LDST_ATTRIB}, - [midgard_op_ld_ubo_u8] = {"ld_ubo_u8", M32}, - [midgard_op_ld_ubo_u16] = {"ld_ubo_u16", M16}, - [midgard_op_ld_ubo_u32] = {"ld_ubo_u32", M32}, - [midgard_op_ld_ubo_u64] = {"ld_ubo_u64", M32}, - [midgard_op_ld_ubo_u128] = {"ld_ubo_u128", M32}, + [midgard_op_ld_vary_32] = {"LD_VARY.f32", M32 | LDST_ATTRIB}, + [midgard_op_ld_vary_16] = {"LD_VARY.f16", M32 | LDST_ATTRIB}, + [midgard_op_ld_vary_32i] = {"LD_VARY.s32", M32 | LDST_ATTRIB}, + [midgard_op_ld_vary_32u] = {"LD_VARY.u32", M32 | LDST_ATTRIB}, - [midgard_op_ld_image_32f] = {"ld_image_32f", M32}, - [midgard_op_ld_image_16f] = {"ld_image_16f", M16}, - [midgard_op_ld_image_32i] = {"ld_image_32i", M32}, - [midgard_op_ld_image_32u] = {"ld_image_32u", M32}, + [midgard_op_ld_special_32f] = {"LD_SPECIAL.f32", M32 | LDST_SPECIAL_MASK}, + [midgard_op_ld_special_16f] = {"LD_SPECIAL.f16", M16 | LDST_SPECIAL_MASK}, + [midgard_op_ld_special_32u] = {"LD_SPECIAL.u32", M32}, + [midgard_op_ld_special_32i] = {"LD_SPECIAL.s32", M32}, - [midgard_op_st_u8] = {"st_u8", M32 | LDST_STORE | LDST_ADDRESS}, - [midgard_op_st_u16] = {"st_u16", M16 | LDST_STORE | LDST_ADDRESS}, - [midgard_op_st_u32] = {"st_u32", M32 | LDST_STORE | LDST_ADDRESS}, - [midgard_op_st_u64] = {"st_u64", M32 | LDST_STORE | LDST_ADDRESS}, - [midgard_op_st_u128] = {"st_u128", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_ld_tilebuffer_32f] = {"LD_TILEBUFFER.f32", M32}, + [midgard_op_ld_tilebuffer_16f] = {"LD_TILEBUFFER.f16", M16}, + [midgard_op_ld_tilebuffer_raw] = {"LD_TILEBUFFER.raw", M32}, - [midgard_op_st_vary_32] = {"st_vary_32", M32 | LDST_STORE}, - [midgard_op_st_vary_32i] = {"st_vary_32i", M32 | LDST_STORE}, - [midgard_op_st_vary_32u] = {"st_vary_32u", M32 | LDST_STORE}, - [midgard_op_st_vary_16] = {"st_vary_16", M16 | LDST_STORE}, + [midgard_op_ld_ubo_u8] = {"LD_UBO.u8", M32}, + [midgard_op_ld_ubo_i8] = {"LD_UBO.s8", M32}, + [midgard_op_ld_ubo_u16] = {"LD_UBO.u16", M16}, + [midgard_op_ld_ubo_i16] = {"LD_UBO.s16", M16}, + [midgard_op_ld_ubo_u16_be] = {"LD_UBO.u16.be", M16}, + [midgard_op_ld_ubo_i16_be] = {"LD_UBO.s16.be", M16}, + [midgard_op_ld_ubo_32] = {"LD_UBO.32", M32}, + [midgard_op_ld_ubo_32_bswap2] = {"LD_UBO.32.bswap2", M32}, + [midgard_op_ld_ubo_32_bswap4] = {"LD_UBO.32.bswap4", M32}, + [midgard_op_ld_ubo_64] = {"LD_UBO.64", M32}, + [midgard_op_ld_ubo_64_bswap2] = {"LD_UBO.64.bswap2", M32}, + [midgard_op_ld_ubo_64_bswap4] = {"LD_UBO.64.bswap4", M32}, + [midgard_op_ld_ubo_64_bswap8] = {"LD_UBO.64.bswap8", M32}, + [midgard_op_ld_ubo_128] = {"LD_UBO.128", M32}, + [midgard_op_ld_ubo_128_bswap2] = {"LD_UBO.128.bswap2", M32}, + [midgard_op_ld_ubo_128_bswap4] = {"LD_UBO.128.bswap4", M32}, + [midgard_op_ld_ubo_128_bswap8] = {"LD_UBO.128.bswap8", M32}, - [midgard_op_st_image_32f] = {"st_image_32f", M32 | LDST_STORE}, - [midgard_op_st_image_16f] = {"st_image_16f", M16 | LDST_STORE}, - [midgard_op_st_image_32i] = {"st_image_32i", M32 | LDST_STORE}, - [midgard_op_st_image_32u] = {"st_image_32u", M32 | LDST_STORE}, + [midgard_op_ld_image_32f] = {"LD_IMAGE.f32", M32 | LDST_ATTRIB}, + [midgard_op_ld_image_16f] = {"LD_IMAGE.f16", M16 | LDST_ATTRIB}, + [midgard_op_ld_image_32i] = {"LD_IMAGE.s32", M32 | LDST_ATTRIB}, + [midgard_op_ld_image_32u] = {"LD_IMAGE.u32", M32 | LDST_ATTRIB}, + + [midgard_op_st_u8] = {"ST.u8", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_i8] = {"ST.s8", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_u16] = {"ST.u16", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_i16] = {"ST.s16", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_u16_be] = {"ST.u16.be", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_i16_be] = {"ST.s16.be", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_32] = {"ST.32", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_32_bswap2] = {"ST.32.bswap2", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_32_bswap4] = {"ST.32.bswap4", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_64] = {"ST.64", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_64_bswap2] = {"ST.64.bswap2", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_64_bswap4] = {"ST.64.bswap4", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_64_bswap8] = {"ST.64.bswap8", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_128] = {"ST.128", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_128_bswap2] = {"ST.128.bswap2", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_128_bswap4] = {"ST.128.bswap4", M32 | LDST_STORE | LDST_ADDRESS}, + [midgard_op_st_128_bswap8] = {"ST.128.bswap8", M32 | LDST_STORE | LDST_ADDRESS}, + + [midgard_op_st_vary_32] = {"ST_VARY.f32", M32 | LDST_STORE | LDST_ATTRIB}, + [midgard_op_st_vary_32i] = {"ST_VARY.s32", M32 | LDST_STORE | LDST_ATTRIB}, + [midgard_op_st_vary_32u] = {"ST_VARY.u32", M32 | LDST_STORE | LDST_ATTRIB}, + [midgard_op_st_vary_16] = {"ST_VARY.f16", M16 | LDST_STORE | LDST_ATTRIB}, + + [midgard_op_st_image_32f] = {"ST_IMAGE.f32", M32 | LDST_STORE | LDST_ATTRIB}, + [midgard_op_st_image_16f] = {"ST_IMAGE.f16", M16 | LDST_STORE | LDST_ATTRIB}, + [midgard_op_st_image_32i] = {"ST_IMAGE.u32", M32 | LDST_STORE | LDST_ATTRIB}, + [midgard_op_st_image_32u] = {"ST_IMAGE.s32", M32 | LDST_STORE | LDST_ATTRIB}, + + [midgard_op_st_special_32f] = {"ST_SPECIAL.f32", M32}, + [midgard_op_st_special_16f] = {"ST_SPECIAL.f16", M16}, + [midgard_op_st_special_32u] = {"ST_SPECIAL.u32", M32}, + [midgard_op_st_special_32i] = {"ST_SPECIAL.s32", M32}, + + [midgard_op_st_tilebuffer_32f] = {"ST_TILEBUFFER.f32", M32}, + [midgard_op_st_tilebuffer_16f] = {"ST_TILEBUFFER.f16", M16}, + [midgard_op_st_tilebuffer_raw] = {"ST_TILEBUFFER.raw", M32}, }; #undef M8 diff --git a/src/panfrost/midgard/midgard_ops.h b/src/panfrost/midgard/midgard_ops.h index 58019a7d0c6..c9538165a53 100644 --- a/src/panfrost/midgard/midgard_ops.h +++ b/src/panfrost/midgard/midgard_ops.h @@ -32,6 +32,7 @@ extern struct mir_ldst_op_props load_store_opcode_props[256]; extern struct mir_tag_props midgard_tag_props[16]; #define OP_IS_ATOMIC(op) (load_store_opcode_props[op].props & LDST_ATOMIC) +#define OP_USES_ATTRIB(op) (load_store_opcode_props[op].props & LDST_ATTRIB) #define OP_IS_STORE(op) (load_store_opcode_props[op].props & LDST_STORE) #define OP_HAS_ADDRESS(op) (load_store_opcode_props[op].props & LDST_ADDRESS) diff --git a/src/panfrost/midgard/midgard_opt_perspective.c b/src/panfrost/midgard/midgard_opt_perspective.c index f2a83576bba..97e9d338519 100644 --- a/src/panfrost/midgard/midgard_opt_perspective.c +++ b/src/panfrost/midgard/midgard_opt_perspective.c @@ -122,8 +122,8 @@ midgard_opt_combine_projection(compiler_context *ctx, midgard_block *block) .src_types = { nir_type_float32 }, .swizzle = SWIZZLE_IDENTITY_4, .op = frcp_component == COMPONENT_W ? - midgard_op_ldst_perspective_division_w : - midgard_op_ldst_perspective_division_z, + midgard_op_ldst_perspective_div_w : + midgard_op_ldst_perspective_div_z, .load_store = { .arg_1 = 0x20 } @@ -175,7 +175,7 @@ midgard_opt_varying_projection(compiler_context *ctx, midgard_block *block) break; bool projects_w = - ins->op == midgard_op_ldst_perspective_division_w; + ins->op == midgard_op_ldst_perspective_div_w; p.modifier = projects_w ? midgard_varying_mod_perspective_w : diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 4ff34238f06..f28e2a6919d 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -986,7 +986,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) .dest_type = ins->src_types[i], .src = { ~0, ~0, ~0, ~0 }, .swizzle = SWIZZLE_IDENTITY_4, - .op = midgard_op_ld_ubo_u128, + .op = midgard_op_ld_ubo_128, .load_store = { .arg_1 = ctx->info->push.words[idx].ubo, .arg_2 = 0x1E,