From 59dc07e02c226f98aa6af3fe0f4e8916e4f864fd Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Tue, 8 Apr 2025 15:18:16 +0100 Subject: [PATCH] pco: improve image write using pck.prog Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/compiler/nir/nir_intrinsics.py | 4 + src/imagination/pco/pco_map.py | 39 +++++ src/imagination/pco/pco_nir_tex.c | 251 +++++++++++++--------------- src/imagination/pco/pco_ops.py | 1 + src/imagination/pco/pco_trans_nir.c | 10 ++ 5 files changed, 168 insertions(+), 137 deletions(-) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d6f90db7e4f..6d9d1660335 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2746,3 +2746,7 @@ intrinsic("flush_tile_buffer_pco", src_comp=[1, 1]) intrinsic("dummy_load_store_pco", indices=[BASE]) intrinsic("alpha_to_coverage_pco", src_comp=[1], dest_comp=1, flags=[CAN_REORDER], bit_sizes=[32]) + +index("bool", "scale") +index("bool", "roundzero") +intrinsic("pck_prog_pco", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[SCALE, ROUNDZERO], bit_sizes=[32]) diff --git a/src/imagination/pco/pco_map.py b/src/imagination/pco/pco_map.py index 34812d55030..0e498222933 100644 --- a/src/imagination/pco/pco_map.py +++ b/src/imagination/pco/pco_map.py @@ -1113,6 +1113,18 @@ encode_map(O_PCK, op_ref_maps=[('2_pck', ['ft2'], [['is3', '_']])] ) +encode_map(O_PCK_PROG, + encodings=[ + (I_PCK, [ + ('prog', True), + ('rtz', OM_ROUNDZERO), + ('scale', OM_SCALE), + ('pck_format', 0) + ]) + ], + op_ref_maps=[('2_pck', ['ft2'], ['is3', 'fte'])] +) + encode_map(O_UNPCK, encodings=[ (I_UPCK, [ @@ -2033,6 +2045,33 @@ group_map(O_PCK, dests=[('w[0]', ('2_pck', DEST(0)), 'ft2')] ) +group_map(O_PCK_PROG, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'p0_p2'), + ('olchk', OM_OLCHK), + ('w1p', False), + ('w0p', True), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[ + ('0', O_MBYP, ['ft0'], [SRC(0)]), + ('2_pck', O_PCK_PROG, [DEST(0)], ['is3', SRC(1)], [(OM_ROUNDZERO, OM_ROUNDZERO), (OM_SCALE, OM_SCALE)]) + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[3]', ('2_pck', SRC(1)), 'fte') + ], + iss=[ + ('is[0]', 's3'), + ('is[3]', 'ft0'), + ('is[4]', 'ft2') + ], + dests=[('w[0]', ('2_pck', DEST(0)), 'ft2')] +) + group_map(O_UNPCK, hdr=(I_IGRP_HDR_MAIN, [ ('oporg', 'p0'), diff --git a/src/imagination/pco/pco_nir_tex.c b/src/imagination/pco/pco_nir_tex.c index 1e003f51dd9..c9aa5ee2835 100644 --- a/src/imagination/pco/pco_nir_tex.c +++ b/src/imagination/pco/pco_nir_tex.c @@ -16,6 +16,7 @@ #include "nir_builtin_builder.h" #include "pco.h" #include "pco_builder.h" +#include "pco_common.h" #include "pco_internal.h" #include "util/macros.h" @@ -890,162 +891,138 @@ static nir_def *lower_image(nir_builder *b, nir_instr *instr, void *cb_data) enum pipe_format data_format = nir_type_to_pipe_format(type, desc->nr_channels); - /* TODO: u/sint need clamping? */ - if (format != data_format) { - nir_def *write_data_comps[4]; - for (unsigned c = 0; c < desc->nr_channels; ++c) { - enum pipe_swizzle chan = desc->swizzle[c]; - nir_def *input = nir_channel(b, write_data, c); + enum pco_pck_format pck_format = ~0; + bool scale = false; + bool roundzero = false; + bool split = false; - switch (format) { - case PIPE_FORMAT_R8_UINT: - case PIPE_FORMAT_R8G8_UINT: - case PIPE_FORMAT_R8G8B8_UINT: - case PIPE_FORMAT_R8G8B8A8_UINT: + switch (format) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + pck_format = PCO_PCK_FORMAT_U8888; + scale = true; + break; - case PIPE_FORMAT_R8_SINT: - case PIPE_FORMAT_R8G8_SINT: - case PIPE_FORMAT_R8G8B8_SINT: - case PIPE_FORMAT_R8G8B8A8_SINT: - write_data_comps[chan] = - nir_bitfield_insert_imm(b, nir_imm_int(b, 0), input, 0, 8); - break; + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + pck_format = PCO_PCK_FORMAT_S8888; + scale = true; + break; - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - write_data_comps[chan] = nir_pack_unorm_8(b, input); - break; + case PIPE_FORMAT_R11G11B10_FLOAT: + pck_format = PCO_PCK_FORMAT_F111110; + break; - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SNORM: - write_data_comps[chan] = nir_pack_snorm_8(b, input); - break; + case PIPE_FORMAT_R10G10B10A2_UNORM: + pck_format = PCO_PCK_FORMAT_U1010102; + scale = true; + break; - case PIPE_FORMAT_R11G11B10_FLOAT: - switch (chan) { - case PIPE_SWIZZLE_X: - case PIPE_SWIZZLE_Y: - write_data_comps[chan] = nir_pack_float_11(b, input); - break; + case PIPE_FORMAT_R10G10B10A2_SNORM: + pck_format = PCO_PCK_FORMAT_S1010102; + scale = true; + break; - case PIPE_SWIZZLE_Z: - write_data_comps[chan] = nir_pack_float_10(b, input); - break; + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + pck_format = PCO_PCK_FORMAT_F16F16; + split = true; + break; - default: - UNREACHABLE(""); - } - break; + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + pck_format = PCO_PCK_FORMAT_U1616; + scale = true; + split = true; + break; - case PIPE_FORMAT_R10G10B10A2_UINT: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + pck_format = PCO_PCK_FORMAT_S1616; + scale = true; + split = true; + break; - case PIPE_FORMAT_R10G10B10A2_SINT: + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R8G8_UINT: + case PIPE_FORMAT_R8G8B8_UINT: + case PIPE_FORMAT_R8G8B8A8_UINT: - switch (chan) { - case PIPE_SWIZZLE_X: - case PIPE_SWIZZLE_Y: - case PIPE_SWIZZLE_Z: - write_data_comps[chan] = - nir_bitfield_insert_imm(b, nir_imm_int(b, 0), input, 0, 10); - break; + case PIPE_FORMAT_R8_SINT: + case PIPE_FORMAT_R8G8_SINT: + case PIPE_FORMAT_R8G8B8_SINT: + case PIPE_FORMAT_R8G8B8A8_SINT: - case PIPE_SWIZZLE_W: - write_data_comps[chan] = - nir_bitfield_insert_imm(b, nir_imm_int(b, 0), input, 0, 2); - break; + case PIPE_FORMAT_R10G10B10A2_UINT: + case PIPE_FORMAT_R10G10B10A2_SINT: - default: - UNREACHABLE(""); - } - break; + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_R16G16_UINT: + case PIPE_FORMAT_R16G16B16_UINT: + case PIPE_FORMAT_R16G16B16A16_UINT: - /* TODO: better way to do the 1x2 component. */ - case PIPE_FORMAT_R10G10B10A2_UNORM: - switch (chan) { - case PIPE_SWIZZLE_X: - case PIPE_SWIZZLE_Y: - case PIPE_SWIZZLE_Z: - write_data_comps[chan] = nir_pack_unorm_10(b, input); - break; + case PIPE_FORMAT_R16_SINT: + case PIPE_FORMAT_R16G16_SINT: + case PIPE_FORMAT_R16G16B16_SINT: + case PIPE_FORMAT_R16G16B16A16_SINT: - case PIPE_SWIZZLE_W: - write_data_comps[chan] = - nir_f2i32_rtne(b, - nir_fmul_imm(b, nir_fsat(b, input), 3.0f)); - break; + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_R32G32B32_UINT: + case PIPE_FORMAT_R32G32B32A32_UINT: - default: - UNREACHABLE(""); - } - break; + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R32G32B32_SINT: + case PIPE_FORMAT_R32G32B32A32_SINT: + /* No conversion needed. */ + break; - /* TODO: better way to do the 1x2 component. */ - case PIPE_FORMAT_R10G10B10A2_SNORM: - switch (chan) { - case PIPE_SWIZZLE_X: - case PIPE_SWIZZLE_Y: - case PIPE_SWIZZLE_Z: - write_data_comps[chan] = nir_pack_snorm_10(b, input); - break; - - case PIPE_SWIZZLE_W: - write_data_comps[chan] = - nir_f2i32_rtne(b, nir_fsat_signed(b, input)); - break; - - default: - UNREACHABLE(""); - } - break; - - case PIPE_FORMAT_R16_UINT: - case PIPE_FORMAT_R16G16_UINT: - case PIPE_FORMAT_R16G16B16_UINT: - case PIPE_FORMAT_R16G16B16A16_UINT: - - case PIPE_FORMAT_R16_SINT: - case PIPE_FORMAT_R16G16_SINT: - case PIPE_FORMAT_R16G16B16_SINT: - case PIPE_FORMAT_R16G16B16A16_SINT: - write_data_comps[chan] = - nir_bitfield_insert_imm(b, nir_imm_int(b, 0), input, 0, 16); - break; - - case PIPE_FORMAT_R16_FLOAT: - case PIPE_FORMAT_R16G16_FLOAT: - case PIPE_FORMAT_R16G16B16_FLOAT: - case PIPE_FORMAT_R16G16B16A16_FLOAT: - write_data_comps[chan] = nir_pack_half_16(b, input); - break; - - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16G16_UNORM: - case PIPE_FORMAT_R16G16B16_UNORM: - case PIPE_FORMAT_R16G16B16A16_UNORM: - write_data_comps[chan] = nir_pack_unorm_16(b, input); - break; - - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16B16A16_SNORM: - write_data_comps[chan] = nir_pack_snorm_16(b, input); - break; - - default: - printf("Unsupported image write pack format %s.\n", - util_format_name(format)); - UNREACHABLE(""); - } + default: + printf("Unsupported image write pack format %s.\n", + util_format_name(format)); + UNREACHABLE(""); } - write_data = nir_vec(b, write_data_comps, desc->nr_channels); - write_data = nir_pad_vector(b, write_data, 4); + if (pck_format != ~0) { + if (split) { + nir_def *lower = + nir_pck_prog_pco(b, + nir_channels(b, write_data, 0b0011), + nir_imm_int(b, pck_format), + .scale = scale, + .roundzero = roundzero); + nir_def *upper = + nir_pck_prog_pco(b, + nir_channels(b, write_data, 0b1100), + nir_imm_int(b, pck_format), + .scale = scale, + .roundzero = roundzero); + + write_data = nir_vec4(b, + nir_channel(b, lower, 0), + nir_channel(b, lower, 1), + nir_channel(b, upper, 0), + nir_channel(b, upper, 1)); + } else { + write_data = nir_pck_prog_pco(b, + write_data, + nir_imm_int(b, pck_format), + .scale = scale, + .roundzero = roundzero); + } + } } } diff --git a/src/imagination/pco/pco_ops.py b/src/imagination/pco/pco_ops.py index a2ccbf489fc..eb772a09429 100644 --- a/src/imagination/pco/pco_ops.py +++ b/src/imagination/pco/pco_ops.py @@ -356,6 +356,7 @@ O_FDSXF = hw_op('fdsxf', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]]) O_FDSY = hw_op('fdsy', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]]) O_FDSYF = hw_op('fdsyf', OM_ALU, 1, 1, [], [[RM_ABS, RM_NEG]]) O_PCK = hw_op('pck', OM_ALU + [OM_PCK_FMT, OM_ROUNDZERO, OM_SCALE], 1, 1) +O_PCK_PROG = hw_op('pck.prog', OM_ALU + [OM_ROUNDZERO, OM_SCALE], 1, 2) O_ADD64_32 = hw_op('add64_32', OM_ALU + [OM_S], 2, 4, [], [[RM_ABS, RM_NEG], [], [RM_ABS, RM_NEG]]) O_IMADD64 = hw_op('imadd64', OM_ALU + [OM_S], 2, 5, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) O_IMADD32 = hw_op('imadd32', OM_ALU + [OM_S], 1, 4, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index b24ad8e2dbf..a70defda5d7 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -1744,6 +1744,16 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) instr = pco_wop(&tctx->b); break; + case nir_intrinsic_pck_prog_pco: + instr = pco_pck_prog(&tctx->b, + dest, + src[0], + src[1], + .scale = nir_intrinsic_scale(intr), + .roundzero = nir_intrinsic_roundzero(intr), + .rpt = pco_ref_get_chans(dest)); + break; + default: printf("Unsupported intrinsic: \""); nir_print_instr(&intr->instr, stdout);