diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index 8d3ca5cd6a2..ac52414b900 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -90,16 +90,6 @@ constant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size) } } -/** - * Evaluate component 3 of packSnorm3x10_1x2. - */ -static uint16_t -pack_snorm_1x2(float x) -{ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 1.0f); -} - /** * Evaluate one component of packSnorm4x8. */ @@ -122,16 +112,6 @@ pack_snorm_1x8(float x) _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); } -/** - * Evaluate component [0,2] of packSnorm3x10_1x2. - */ -static uint16_t -pack_snorm_1x10(float x) -{ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 511.0f); -} - /** * Evaluate one component of packSnorm2x16. */ @@ -154,16 +134,6 @@ pack_snorm_1x16(float x) _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); } -/** - * Evaluate component 3 of unpackSnorm3x10_1x2. - */ -static float -unpack_snorm_1x2(uint16_t u) -{ - u &= 0x0003; - return CLAMP((int16_t) u / 1.0f, -1.0f, +1.0f); -} - /** * Evaluate one component of unpackSnorm4x8. */ @@ -182,16 +152,6 @@ unpack_snorm_1x8(uint8_t u) return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); } -/** - * Evaluate component [0,2] of unpackSnorm3x10_1x2. - */ -static float -unpack_snorm_1x10(uint16_t u) -{ - u &= 0x03FF; - return CLAMP((int16_t) u / 511.0f, -1.0f, +1.0f); -} - /** * Evaluate one component of unpackSnorm2x16. */ @@ -210,16 +170,6 @@ unpack_snorm_1x16(uint16_t u) return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); } -/** - * Evaluate component 3 of packUnorm3x10_1x2. - */ -static uint16_t -pack_unorm_1x2(float x) -{ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 3.0f); -} - /** * Evaluate one component packUnorm4x8. */ @@ -239,16 +189,6 @@ pack_unorm_1x8(float x) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); } -/** - * Evaluate component [0,2] of packUnorm3x10_1x2. - */ -static uint16_t -pack_unorm_1x10(float x) -{ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 1023.0f); -} - /** * Evaluate one component packUnorm2x16. */ @@ -268,16 +208,6 @@ pack_unorm_1x16(float x) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); } -/** - * Evaluate component 3 of unpackUnorm3x10_1x2. - */ -static float -unpack_unorm_1x2(uint16_t u) -{ - u &= 0x0003; - return (float) u / 3.0f; -} - /** * Evaluate one component of unpackUnorm4x8. */ @@ -296,16 +226,6 @@ unpack_unorm_1x8(uint8_t u) return (float) u / 255.0f; } -/** - * Evaluate component [0,2] of unpackUnorm3x10_1x2. - */ -static float -unpack_unorm_1x10(uint16_t u) -{ - u &= 0x03FF; - return (float) u / 1023.0f; -} - /** * Evaluate one component of unpackUnorm2x16. */ @@ -324,95 +244,6 @@ unpack_unorm_1x16(uint16_t u) return (float) u / 65535.0f; } -/** - * Evaluate component 3 of packUscaled3x10_1x2. - */ -static uint16_t -pack_uscaled_1x2(float x) -{ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, 0.0f, 3.0f)); -} - -/** - * Evaluate component [0,2] of packUscaled3x10_1x2. - */ -static uint16_t -pack_uscaled_1x10(float x) -{ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, 0.0f, 1023.0f)); -} - -/** - * Evaluate component 3 of packSscaled3x10_1x2. - */ -static uint16_t -pack_sscaled_1x2(float x) -{ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, -2.0f, +1.0f)); -} - -/** - * Evaluate component [0,2] of packSscaled3x10_1x2. - */ -static uint16_t -pack_sscaled_1x10(float x) -{ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, -512.0f, +511.0f)); -} - -/** - * Evaluate component 3 of unpackSscaled3x10_1x2. - */ -static float -unpack_sscaled_1x2(uint16_t u) -{ - u &= 0x0003; - return CLAMP((int16_t) u, -2.0f, +1.0f); -} - -/** - * Evaluate one component of unpackSscaled4x8. - */ -static float -unpack_sscaled_1x8(uint8_t u) -{ - return CLAMP((int8_t) u, -128.0f, +127.0f); -} - -/** - * Evaluate component [0,2] of unpackSscaled3x10_1x2. - */ -static float -unpack_sscaled_1x10(uint16_t u) -{ - u &= 0x03FF; - return CLAMP((int16_t) u, -512.0f, +511.0f); -} - -/** - * Evaluate component 3 of unpackUscaled3x10_1x2. - */ -static float -unpack_uscaled_1x2(uint16_t u) -{ - u &= 0x0003; - return (float) u; -} - -/** - * Evaluate component [0,2] of unpackUscaled3x10_1x2. - */ -static float -unpack_uscaled_1x10(uint16_t u) -{ - u &= 0x03FF; - return (float) u; -} - /** * Evaluate one component of packHalf2x16. */ diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index c529a95b2a3..975126e8459 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2754,4 +2754,6 @@ intrinsic("alpha_to_coverage_pco", src_comp=[1], dest_comp=1, flags=[CAN_REORDER index("bool", "scale") index("bool", "roundzero") +intrinsic("pack_pco", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[FORMAT], bit_sizes=[32]) +intrinsic("unpack_pco", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[FORMAT], bit_sizes=[32]) intrinsic("pck_prog_pco", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[SCALE, ROUNDZERO], bit_sizes=[32]) diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index 40ebe00c3e6..64d6e469cb9 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -278,20 +278,6 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) */ return NULL; - case nir_op_unpack_snorm_8_8: - case nir_op_unpack_sscaled_8_8: - case nir_op_unpack_unorm_8_8: - case nir_op_unpack_snorm_8_8_8: - case nir_op_unpack_sscaled_8_8_8: - case nir_op_unpack_unorm_8_8_8: - case nir_op_unpack_sscaled_8_8_8_8: - case nir_op_unpack_snorm_10_10_10_2: - case nir_op_unpack_unorm_10_10_10_2: - case nir_op_unpack_sscaled_10_10_10_2: - case nir_op_unpack_uscaled_10_10_10_2: - case nir_op_unpack_float_11_11_10: - return NULL; - case nir_op_unpack_half_2x16: { if (!b->shader->options->lower_unpack_half_2x16) return NULL; diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index ddefc96afe0..a99815da021 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1810,124 +1810,3 @@ unop_numeric_convert("f2e4m3fn_satfn", tuint8, tfloat32, "isinf(src0) ? 0x7f : _ unop_numeric_convert("e5m22f", tfloat32, tuint8, "_mesa_e5m2_to_float(src0)") unop_numeric_convert("f2e5m2", tuint8, tfloat32, "_mesa_float_to_e5m2(src0)") unop_numeric_convert("f2e5m2_sat", tuint8, tfloat32, "_mesa_float_to_e5m2_sat(src0)") - -def pack_r(fmt, r): - unop_horiz(f"pack_{fmt}_{r}", 1, tuint32, 1, tfloat32, f""" -dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x); -""") - -def pack_rg(fmt, r, g): - unop_horiz(f"pack_{fmt}_{r}_{g}", 1, tuint32, 2, tfloat32, f""" -dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x); -dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r}; -""") - -def pack_rgb(fmt, r, g, b): - unop_horiz(f"pack_{fmt}_{r}_{g}_{b}", 1, tuint32, 3, tfloat32, f""" -dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x); -dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r}; -dst.x |= ((uint32_t) pack_{fmt}_1x{b}(src0.z)) << {r + g}; -""") - -def pack_rgba(fmt, r, g, b, a): - unop_horiz(f"pack_{fmt}_{r}_{g}_{b}_{a}", 1, tuint32, 4, tfloat32, f""" -dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x); -dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r}; -dst.x |= ((uint32_t) pack_{fmt}_1x{b}(src0.z)) << {r + g}; -dst.x |= ((uint32_t) pack_{fmt}_1x{a}(src0.w)) << {r + g + b}; -""") - -pack_r("snorm", 8) -pack_r("unorm", 8) - -pack_rg("snorm", 8, 8) -pack_rg("unorm", 8, 8) - -pack_rgb("snorm", 8, 8, 8) -pack_rgb("unorm", 8, 8, 8) - -pack_r("snorm", 10) -pack_r("unorm", 10) - -pack_r("snorm", 16) -pack_r("unorm", 16) -pack_r("half", 16) - -pack_rgba("snorm", 10, 10, 10, 2) -pack_rgba("unorm", 10, 10, 10, 2) - -pack_rgba("sscaled", 10, 10, 10, 2) -pack_rgba("uscaled", 10, 10, 10, 2) - -unop_horiz(f"pack_float_10", 1, tuint32, 1, tfloat32, f""" -dst.x = f32_to_uf10(src0.x) & 0x3ff; -""") - -unop_horiz(f"pack_float_11", 1, tuint32, 1, tfloat32, f""" -dst.x = f32_to_uf11(src0.x) & 0x7ff; -""") - -unop_horiz(f"pack_float_11_11_10", 1, tuint32, 3, tfloat32, f""" -dst.x = f32_to_uf11(src0.x) & 0x7ff; -dst.x |= (f32_to_uf11(src0.y) & 0x7ff) << 11; -dst.x |= (f32_to_uf10(src0.z) & 0x3ff) << 22; -""") - -def unpack_r(fmt, r): - unop_horiz(f"unpack_{fmt}_{r}", 1, tfloat32, 1, tuint32, f""" -dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1)); -""") - -def unpack_rg(fmt, r, g): - unop_horiz(f"unpack_{fmt}_{r}_{g}", 2, tfloat32, 1, tuint32, f""" -dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1)); -dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1)); -""") - -def unpack_rgb(fmt, r, g, b): - unop_horiz(f"unpack_{fmt}_{r}_{g}_{b}", 3, tfloat32, 1, tuint32, f""" -dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1)); -dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1)); -dst.z = unpack_{fmt}_1x{b}((src0.x >> {r + g}) & ((1u << {b}) - 1)); -""") - -def unpack_rgba(fmt, r, g, b, a): - unop_horiz(f"unpack_{fmt}_{r}_{g}_{b}_{a}", 4, tfloat32, 1, tuint32, f""" -dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1)); -dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1)); -dst.z = unpack_{fmt}_1x{b}((src0.x >> {r + g}) & ((1u << {b}) - 1)); -dst.w = unpack_{fmt}_1x{a}((src0.x >> {r + g + b}) & ((1u << {a}) - 1)); -""") - -unpack_r("snorm", 8) -unpack_r("sscaled", 8) -unpack_r("unorm", 8) - -unpack_rg("snorm", 8, 8) -unpack_rg("sscaled", 8, 8) -unpack_rg("unorm", 8, 8) - -unpack_rgb("snorm", 8, 8, 8) -unpack_rgb("sscaled", 8, 8, 8) -unpack_rgb("unorm", 8, 8, 8) - -unpack_rgba("sscaled", 8, 8, 8, 8) - -unpack_r("snorm", 16) -unpack_r("unorm", 16) - -unop_horiz(f"unpack_half_16", 1, tfloat32, 1, tuint32, """ -dst.x = unpack_half_1x16(src0.x & ((1u << 16) - 1), nir_is_denorm_flush_to_zero(execution_mode, 16)); -""") - -unpack_rgba("snorm", 10, 10, 10, 2) -unpack_rgba("unorm", 10, 10, 10, 2) - -unpack_rgba("sscaled", 10, 10, 10, 2) -unpack_rgba("uscaled", 10, 10, 10, 2) - -unop_horiz(f"unpack_float_11_11_10", 3, tfloat32, 1, tuint32, f""" -dst.x = uf11_to_f32(src0.x & 0x7ff); -dst.y = uf11_to_f32((src0.x >> 11) & 0x7ff); -dst.z = uf10_to_f32((src0.x >> 22) & 0x3ff); -""") diff --git a/src/imagination/pco/pco_nir_pvfio.c b/src/imagination/pco/pco_nir_pvfio.c index f3a2869c6e1..6e6336ed846 100644 --- a/src/imagination/pco/pco_nir_pvfio.c +++ b/src/imagination/pco/pco_nir_pvfio.c @@ -118,6 +118,30 @@ to_pbe_format(nir_builder *b, enum pipe_format format, nir_def **input) return format; } +static unsigned format_chans_per_dword(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned accum_bits = 0; + + for (unsigned u = 0; u < desc->nr_channels; ++u) { + /* Exactly one dword, great! */ + if (accum_bits == 32) + return u; + + /* Went over, back off by one. */ + if (accum_bits > 32) { + /* We don't support formats with channels > 1 dword. */ + assert(u > 1); + return u - 1; + } + + accum_bits += desc->channel[u].size; + } + + /* Loop finished, all channels can fit. */ + return desc->nr_channels; +} + static nir_def *pack_to_format(nir_builder *b, nir_def *input, nir_alu_type src_type, @@ -125,628 +149,153 @@ static nir_def *pack_to_format(nir_builder *b, { const struct util_format_description *desc = util_format_description(format); - src_type = nir_alu_type_get_base_type(src_type); + nir_def *zero = nir_imm_int(b, 0); + nir_def *one = nir_alu_type_get_base_type(src_type) == nir_type_float + ? nir_imm_float(b, 1.0f) + : nir_imm_int(b, 1); nir_def *input_comps[4]; - for (unsigned u = 0; u < desc->nr_channels; ++u) { + /* Populate any missing components .*/ + for (unsigned u = 0; u < ARRAY_SIZE(input_comps); ++u) { enum pipe_swizzle s = desc->swizzle[u]; - if (s <= PIPE_SWIZZLE_W) { + + if (s <= PIPE_SWIZZLE_W) input_comps[u] = nir_channel(b, input, s); - } else if (s == PIPE_SWIZZLE_0) { - input_comps[u] = nir_imm_int(b, 0); - } else if (s == PIPE_SWIZZLE_1) { - input_comps[u] = src_type == nir_type_float ? nir_imm_float(b, 1.0f) - : nir_imm_int(b, 1); - } else { + else if (s == PIPE_SWIZZLE_0) + input_comps[u] = zero; + else if (s == PIPE_SWIZZLE_1) + input_comps[u] = one; + else UNREACHABLE(""); + } + + unsigned format_bits = util_format_get_blocksizebits(format); + unsigned format_dwords = DIV_ROUND_UP(format_bits, 32); + nir_def *packed_comps[] = { zero, zero, zero, zero }; + + /* Special case: no packing required. */ + if (util_format_get_max_channel_size(format) == 32) + return nir_vec(b, input_comps, format_dwords); + + /* Special case: can't be packed with op, need bit-packing instead. */ + if (util_format_is_pure_integer(format)) { + for (unsigned u = 0; u < desc->nr_channels; ++u) { + unsigned dword = desc->channel[u].shift / 32; + unsigned offset = desc->channel[u].shift % 32; + unsigned size = desc->channel[u].size; + + packed_comps[dword] = nir_bitfield_insert_imm(b, + packed_comps[dword], + input_comps[u], + offset, + size); } + + return nir_vec(b, packed_comps, format_dwords); } + unsigned chans_per_dword = format_chans_per_dword(format); + unsigned chans_remaining = desc->nr_channels; input = nir_vec(b, input_comps, desc->nr_channels); + for (unsigned u = 0; u < format_dwords; ++u) { + unsigned chans_to_pack = + chans_remaining > chans_per_dword ? chans_per_dword : chans_remaining; + unsigned chans_packed = desc->nr_channels - chans_remaining; - nir_def *zero = nir_imm_int(b, 0); - nir_def *packed[4] = { zero, zero, zero, zero }; - switch (format) { - case PIPE_FORMAT_R8_UNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_unorm_8(b, input); - break; + nir_def *input_chans = + nir_channels(b, input, BITFIELD_RANGE(chans_packed, chans_to_pack)); + packed_comps[u] = nir_pack_pco(b, input_chans, .format = format); - case PIPE_FORMAT_R8G8_UNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_unorm_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8_UNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_unorm_8_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8A8_UNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_unorm_4x8(b, input); - break; - - case PIPE_FORMAT_R8_SNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_snorm_8(b, input); - break; - - case PIPE_FORMAT_R8G8_SNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_snorm_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8_SNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_snorm_8_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8A8_SNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_snorm_4x8(b, input); - break; - - case PIPE_FORMAT_R8G8B8A8_UINT: - case PIPE_FORMAT_R8G8B8A8_SINT: - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[3], 24, 8); - FALLTHROUGH; - - case PIPE_FORMAT_R8G8B8_UINT: - case PIPE_FORMAT_R8G8B8_SINT: - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[2], 16, 8); - FALLTHROUGH; - - case PIPE_FORMAT_R8G8_UINT: - case PIPE_FORMAT_R8G8_SINT: - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 8, 8); - FALLTHROUGH; - - case PIPE_FORMAT_R8_UINT: - case PIPE_FORMAT_R8_SINT: - assert(src_type != nir_type_float); - /* TODO: sat/clamp? */ - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 8); - break; - - case PIPE_FORMAT_R10G10B10A2_UINT: - assert(src_type == nir_type_uint); - /* TODO: sat/clamp? */ - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 10); - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 10, 10); - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[2], 20, 10); - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[3], 30, 2); - break; - - case PIPE_FORMAT_R11G11B10_FLOAT: - assert(src_type == nir_type_float); - packed[0] = nir_pack_float_11_11_10(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16_UNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_unorm_16(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16G16_UNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_unorm_2x16(b, input); - break; - - case PIPE_FORMAT_R16G16B16_UNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b0011)); - packed[1] = nir_pack_unorm_16(b, input_comps[2]); - break; - - case PIPE_FORMAT_R16G16B16A16_UNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b0011)); - packed[1] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b1100)); - break; - - case PIPE_FORMAT_R16_SNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_snorm_16(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16G16_SNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_snorm_2x16(b, input); - break; - - case PIPE_FORMAT_R16G16B16_SNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b0011)); - packed[1] = nir_pack_snorm_16(b, input_comps[2]); - break; - - case PIPE_FORMAT_R16G16B16A16_SNORM: - assert(src_type == nir_type_float); - packed[0] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b0011)); - packed[1] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b1100)); - break; - - case PIPE_FORMAT_R16G16B16A16_UINT: - case PIPE_FORMAT_R16G16B16A16_SINT: - packed[1] = nir_bitfield_insert_imm(b, packed[1], input_comps[3], 16, 16); - FALLTHROUGH; - - case PIPE_FORMAT_R16G16B16_UINT: - case PIPE_FORMAT_R16G16B16_SINT: - packed[1] = nir_bitfield_insert_imm(b, packed[1], input_comps[2], 0, 16); - FALLTHROUGH; - - case PIPE_FORMAT_R16G16_UINT: - case PIPE_FORMAT_R16G16_SINT: - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 16, 16); - FALLTHROUGH; - - case PIPE_FORMAT_R16_UINT: - case PIPE_FORMAT_R16_SINT: - assert(src_type != nir_type_float); - /* TODO: sat/clamp? */ - packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 16); - break; - - case PIPE_FORMAT_R16_FLOAT: - assert(src_type == nir_type_float); - packed[0] = nir_pack_half_16(b, input); - break; - - case PIPE_FORMAT_R16G16_FLOAT: - assert(src_type == nir_type_float); - packed[0] = nir_pack_half_2x16(b, input); - break; - - case PIPE_FORMAT_R16G16B16_FLOAT: - assert(src_type == nir_type_float); - packed[0] = nir_pack_half_2x16(b, nir_channels(b, input, 0b0011)); - packed[1] = nir_pack_half_16(b, input_comps[2]); - break; - - case PIPE_FORMAT_R16G16B16A16_FLOAT: - assert(src_type == nir_type_float); - packed[0] = nir_pack_half_2x16(b, nir_channels(b, input, 0b0011)); - packed[1] = nir_pack_half_2x16(b, nir_channels(b, input, 0b1100)); - break; - - case PIPE_FORMAT_R32G32B32A32_UINT: - case PIPE_FORMAT_R32G32B32A32_SINT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - packed[3] = input_comps[3]; - FALLTHROUGH; - - case PIPE_FORMAT_R32G32B32_UINT: - case PIPE_FORMAT_R32G32B32_SINT: - case PIPE_FORMAT_R32G32B32_FLOAT: - packed[2] = input_comps[2]; - FALLTHROUGH; - - case PIPE_FORMAT_R32G32_UINT: - case PIPE_FORMAT_R32G32_SINT: - case PIPE_FORMAT_R32G32_FLOAT: - packed[1] = input_comps[1]; - FALLTHROUGH; - - case PIPE_FORMAT_R32_UINT: - case PIPE_FORMAT_R32_SINT: - case PIPE_FORMAT_R32_FLOAT: - packed[0] = input_comps[0]; - break; - - default: - printf("Unsupported pack format %s.\n", util_format_name(format)); - UNREACHABLE(""); + chans_remaining -= chans_to_pack; } - unsigned packed_comps = 1; - - if (packed[3] != zero) - packed_comps = 4; - else if (packed[2] != zero) - packed_comps = 3; - else if (packed[1] != zero) - packed_comps = 2; - - assert(packed[0] != zero); - - return nir_vec(b, packed, packed_comps); + assert(!chans_remaining); + return nir_vec(b, packed_comps, format_dwords); } static nir_def *unpack_from_format(nir_builder *b, - nir_def *input, + nir_def *packed_comps[static 4], nir_alu_type dest_type, - enum pipe_format format) + enum pipe_format format, + unsigned components_needed) { const struct util_format_description *desc = util_format_description(format); - dest_type = nir_alu_type_get_base_type(dest_type); + nir_def *unpacked_comps[4]; - nir_def *input_comps[4] = { - nir_channel(b, input, 0), - nir_channel(b, input, 1), - nir_channel(b, input, 2), - nir_channel(b, input, 3), - }; + unsigned format_bits = util_format_get_blocksizebits(format); + unsigned format_dwords = DIV_ROUND_UP(format_bits, 32); - nir_def *unpacked = nir_undef(b, 4, 32); - switch (format) { - case PIPE_FORMAT_R8_UNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_unorm_8(b, input); - break; - - case PIPE_FORMAT_R8G8_UNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_unorm_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8_UNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_unorm_8_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_unorm_4x8(b, input); - break; - - case PIPE_FORMAT_R8_SNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_snorm_8(b, input); - break; - - case PIPE_FORMAT_R8G8_SNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_snorm_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8_SNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_snorm_8_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8A8_SNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_snorm_4x8(b, input); - break; - - case PIPE_FORMAT_R8_SSCALED: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_sscaled_8(b, input); - break; - - case PIPE_FORMAT_R8G8_SSCALED: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_sscaled_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8_SSCALED: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_sscaled_8_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8A8_SSCALED: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_sscaled_8_8_8_8(b, input); - break; - - case PIPE_FORMAT_R8G8B8A8_UINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ubitfield_extract_imm(b, input_comps[0], 24, 8), - 3); - FALLTHROUGH; - - case PIPE_FORMAT_R8G8B8_UINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ubitfield_extract_imm(b, input_comps[0], 16, 8), - 2); - FALLTHROUGH; - - case PIPE_FORMAT_R8G8_UINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ubitfield_extract_imm(b, input_comps[0], 8, 8), - 1); - FALLTHROUGH; - - case PIPE_FORMAT_R8_UINT: - assert(dest_type == nir_type_uint); - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ubitfield_extract_imm(b, input_comps[0], 0, 8), - 0); - break; - - case PIPE_FORMAT_R8G8B8A8_SINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ibitfield_extract_imm(b, input_comps[0], 24, 8), - 3); - FALLTHROUGH; - - case PIPE_FORMAT_R8G8B8_SINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ibitfield_extract_imm(b, input_comps[0], 16, 8), - 2); - FALLTHROUGH; - - case PIPE_FORMAT_R8G8_SINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ibitfield_extract_imm(b, input_comps[0], 8, 8), - 1); - FALLTHROUGH; - - case PIPE_FORMAT_R8_SINT: - assert(dest_type == nir_type_int); - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ibitfield_extract_imm(b, input_comps[0], 0, 8), - 0); - break; - - case PIPE_FORMAT_R10G10B10A2_UNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_unorm_10_10_10_2(b, input_comps[0]); - break; - - case PIPE_FORMAT_R10G10B10A2_SNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_snorm_10_10_10_2(b, input_comps[0]); - break; - - case PIPE_FORMAT_R10G10B10A2_UINT: - assert(dest_type == nir_type_uint); - unpacked = nir_vec4(b, - nir_ubitfield_extract_imm(b, input_comps[0], 0, 10), - nir_ubitfield_extract_imm(b, input_comps[0], 10, 10), - nir_ubitfield_extract_imm(b, input_comps[0], 20, 10), - nir_ubitfield_extract_imm(b, input_comps[0], 30, 2)); - break; - - case PIPE_FORMAT_R10G10B10A2_USCALED: - case PIPE_FORMAT_B10G10R10A2_USCALED: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_uscaled_10_10_10_2(b, input_comps[0]); - break; - - case PIPE_FORMAT_R10G10B10A2_SSCALED: - case PIPE_FORMAT_B10G10R10A2_SSCALED: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_sscaled_10_10_10_2(b, input_comps[0]); - break; - - case PIPE_FORMAT_R11G11B10_FLOAT: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_float_11_11_10(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16_UNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_unorm_16(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16G16_UNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_unorm_2x16(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16G16B16_UNORM: { - assert(dest_type == nir_type_float); - nir_def *lo2 = nir_unpack_unorm_2x16(b, input_comps[0]); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); - unpacked = nir_vector_insert_imm(b, - unpacked, - nir_unpack_unorm_16(b, input_comps[1]), - 2); - break; + /* Special case: no unpacking required. */ + if (util_format_get_max_channel_size(format) == 32) { + for (unsigned u = 0; u < desc->nr_channels; ++u) + unpacked_comps[u] = packed_comps[u]; } - case PIPE_FORMAT_R16G16B16A16_UNORM: - assert(dest_type == nir_type_float); - nir_def *lo2 = nir_unpack_unorm_2x16(b, input_comps[0]); - nir_def *hi2 = nir_unpack_unorm_2x16(b, input_comps[1]); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3); - break; + /* Special case: can't be unpacked with op, need bit-unpacking instead. */ + else if (util_format_is_pure_integer(format)) { + nir_def *(*nir_bitfield_extract_imm)(nir_builder *, + nir_def *, + uint32_t, + uint32_t) = + util_format_is_pure_uint(format) ? nir_ubitfield_extract_imm + : nir_ibitfield_extract_imm; - case PIPE_FORMAT_R16_SNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_snorm_16(b, input_comps[0]); - break; + for (unsigned u = 0; u < desc->nr_channels; ++u) { + unsigned dword = desc->channel[u].shift / 32; + unsigned offset = desc->channel[u].shift % 32; + unsigned size = desc->channel[u].size; - case PIPE_FORMAT_R16G16_SNORM: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_snorm_2x16(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16G16B16_SNORM: { - assert(dest_type == nir_type_float); - nir_def *lo2 = nir_unpack_snorm_2x16(b, input_comps[0]); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); - unpacked = nir_vector_insert_imm(b, - unpacked, - nir_unpack_snorm_16(b, input_comps[1]), - 2); - break; - } - - case PIPE_FORMAT_R16G16B16A16_SNORM: { - assert(dest_type == nir_type_float); - nir_def *lo2 = nir_unpack_snorm_2x16(b, input_comps[0]); - nir_def *hi2 = nir_unpack_snorm_2x16(b, input_comps[1]); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3); - break; - } - - case PIPE_FORMAT_R16G16B16A16_UINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ubitfield_extract_imm(b, input_comps[1], 16, 16), - 3); - FALLTHROUGH; - - case PIPE_FORMAT_R16G16B16_UINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ubitfield_extract_imm(b, input_comps[1], 0, 16), - 2); - FALLTHROUGH; - - case PIPE_FORMAT_R16G16_UINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ubitfield_extract_imm(b, input_comps[0], 16, 16), - 1); - FALLTHROUGH; - - case PIPE_FORMAT_R16_UINT: - assert(dest_type == nir_type_uint); - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ubitfield_extract_imm(b, input_comps[0], 0, 16), - 0); - break; - - case PIPE_FORMAT_R16G16B16A16_SINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ibitfield_extract_imm(b, input_comps[1], 16, 16), - 3); - FALLTHROUGH; - - case PIPE_FORMAT_R16G16B16_SINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ibitfield_extract_imm(b, input_comps[1], 0, 16), - 2); - FALLTHROUGH; - - case PIPE_FORMAT_R16G16_SINT: - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ibitfield_extract_imm(b, input_comps[0], 16, 16), - 1); - FALLTHROUGH; - - case PIPE_FORMAT_R16_SINT: - assert(dest_type == nir_type_int); - unpacked = nir_vector_insert_imm( - b, - unpacked, - nir_ibitfield_extract_imm(b, input_comps[0], 0, 16), - 0); - break; - - case PIPE_FORMAT_R16_FLOAT: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_half_16(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16G16_FLOAT: - assert(dest_type == nir_type_float); - unpacked = nir_unpack_half_2x16(b, input_comps[0]); - break; - - case PIPE_FORMAT_R16G16B16_FLOAT: { - assert(dest_type == nir_type_float); - nir_def *lo2 = nir_unpack_half_2x16(b, input_comps[0]); - - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); - unpacked = nir_vector_insert_imm(b, - unpacked, - nir_unpack_half_16(b, input_comps[1]), - 2); - break; - } - - case PIPE_FORMAT_R16G16B16A16_FLOAT: { - assert(dest_type == nir_type_float); - nir_def *lo2 = nir_unpack_half_2x16(b, input_comps[0]); - nir_def *hi2 = nir_unpack_half_2x16(b, input_comps[1]); - - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2); - unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3); - - break; - } - - case PIPE_FORMAT_R32G32B32A32_UINT: - case PIPE_FORMAT_R32G32B32A32_SINT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - unpacked = nir_vector_insert_imm(b, unpacked, input_comps[3], 3); - FALLTHROUGH; - - case PIPE_FORMAT_R32G32B32_UINT: - case PIPE_FORMAT_R32G32B32_SINT: - case PIPE_FORMAT_R32G32B32_FLOAT: - unpacked = nir_vector_insert_imm(b, unpacked, input_comps[2], 2); - FALLTHROUGH; - - case PIPE_FORMAT_R32G32_UINT: - case PIPE_FORMAT_R32G32_SINT: - case PIPE_FORMAT_R32G32_FLOAT: - unpacked = nir_vector_insert_imm(b, unpacked, input_comps[1], 1); - FALLTHROUGH; - - case PIPE_FORMAT_R32_UINT: - case PIPE_FORMAT_R32_SINT: - case PIPE_FORMAT_R32_FLOAT: - unpacked = nir_vector_insert_imm(b, unpacked, input_comps[0], 0); - break; - - default: - printf("Unsupported unpack format %s.\n", util_format_name(format)); - UNREACHABLE(""); - } - - nir_def *output_comps[4]; - for (unsigned u = 0; u < ARRAY_SIZE(output_comps); ++u) { - enum pipe_swizzle s = desc->swizzle[u]; - if (s <= PIPE_SWIZZLE_W) { - output_comps[u] = nir_channel(b, unpacked, s); - } else if (s == PIPE_SWIZZLE_0) { - output_comps[u] = nir_imm_int(b, 0); - } else if (s == PIPE_SWIZZLE_1) { - output_comps[u] = dest_type == nir_type_float ? nir_imm_float(b, 1.0f) - : nir_imm_int(b, 1); - } else { - UNREACHABLE(""); + unpacked_comps[u] = + nir_bitfield_extract_imm(b, packed_comps[dword], offset, size); } } - return nir_vec(b, output_comps, ARRAY_SIZE(output_comps)); + else { + unsigned chans_per_dword = format_chans_per_dword(format); + unsigned chans_remaining = desc->nr_channels; + + for (unsigned u = 0; u < format_dwords; ++u) { + unsigned chans_to_unpack = chans_remaining > chans_per_dword + ? chans_per_dword + : chans_remaining; + + nir_def *unpacked = nir_unpack_pco(b, + chans_to_unpack, + packed_comps[u], + .format = format); + + unsigned chans_unpacked = desc->nr_channels - chans_remaining; + for (unsigned v = 0; v < chans_to_unpack; ++v) + unpacked_comps[chans_unpacked + v] = nir_channel(b, unpacked, v); + + chans_remaining -= chans_to_unpack; + } + + assert(!chans_remaining); + } + + nir_def *zero = nir_imm_int(b, 0); + nir_def *one = nir_alu_type_get_base_type(dest_type) == nir_type_float + ? nir_imm_float(b, 1.0f) + : nir_imm_int(b, 1); + + nir_def *output_comps[4]; + /* Populate any missing components .*/ + for (unsigned u = 0; u < ARRAY_SIZE(output_comps); ++u) { + enum pipe_swizzle s = desc->swizzle[u]; + + if (s <= PIPE_SWIZZLE_W) + output_comps[u] = unpacked_comps[s]; + else if (s == PIPE_SWIZZLE_0) + output_comps[u] = zero; + else if (s == PIPE_SWIZZLE_1) + output_comps[u] = one; + else + UNREACHABLE(""); + } + + return nir_vec(b, output_comps, components_needed); } static inline bool is_processed(nir_intrinsic_instr *intr) @@ -854,30 +403,29 @@ static nir_def *lower_pfo_load(nir_builder *b, format = to_pbe_format(b, format, NULL); - nir_def *input_comps[4]; - for (unsigned c = 0; c < ARRAY_SIZE(input_comps); ++c) { - input_comps[c] = nir_load_output(b, - 1, - 32, - offset->ssa, - .base = base, - .component = c, - .dest_type = nir_type_invalid | 32, - .io_semantics = io_semantics); + nir_def *packed_comps[4]; + for (unsigned c = 0; c < ARRAY_SIZE(packed_comps); ++c) { + packed_comps[c] = nir_load_output(b, + 1, + 32, + offset->ssa, + .base = base, + .component = c, + .dest_type = nir_type_invalid | 32, + .io_semantics = io_semantics); nir_intrinsic_instr *load = - nir_instr_as_intrinsic(input_comps[c]->parent_instr); + nir_instr_as_intrinsic(packed_comps[c]->parent_instr); util_dynarray_append(&state->loads, nir_intrinsic_instr *, load); } - nir_def *input = nir_vec(b, input_comps, ARRAY_SIZE(input_comps)); nir_alu_type dest_type = nir_intrinsic_dest_type(intr); - nir_def *output = unpack_from_format(b, input, dest_type, format); - if (output->num_components > intr->def.num_components) - output = nir_trim_vector(b, output, intr->def.num_components); - - return output; + return unpack_from_format(b, + packed_comps, + dest_type, + format, + intr->def.num_components); } /** @@ -1412,23 +960,23 @@ bool pco_nir_pvi(nir_shader *shader, pco_vs_data *vs) DIV_ROUND_UP(util_format_get_blocksize(format), sizeof(uint32_t)); var->type = glsl_uvec_type(format_dwords); - nir_def *input_comps[4]; - for (unsigned c = 0; c < ARRAY_SIZE(input_comps); ++c) { - input_comps[c] = nir_load_input(&b, - 1, - 32, - nir_imm_int(&b, 0), - .range = 1, - .component = c, - .dest_type = nir_type_invalid | 32, - .io_semantics = (nir_io_semantics){ - .location = location, - .num_slots = 1, - }); + nir_def *packed_comps[4]; + for (unsigned c = 0; c < ARRAY_SIZE(packed_comps); ++c) { + packed_comps[c] = nir_load_input(&b, + 1, + 32, + nir_imm_int(&b, 0), + .range = 1, + .component = c, + .dest_type = nir_type_invalid | 32, + .io_semantics = (nir_io_semantics){ + .location = location, + .num_slots = 1, + }); } - nir_def *input = nir_vec(&b, input_comps, ARRAY_SIZE(input_comps)); - state.attribs[u] = unpack_from_format(&b, input, base_type, format); + state.attribs[u] = + unpack_from_format(&b, packed_comps, base_type, format, 4); } nir_shader_lower_instructions(shader, is_pvi, lower_pvi, &state); diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 75af2376b1b..e1b4967ae68 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -1574,6 +1574,49 @@ static pco_instr *trans_reg_intr(trans_ctx *tctx, UNREACHABLE(""); } +static enum pco_pck_fmt pco_pck_format_from_pipe_format(enum pipe_format fmt) +{ + const struct util_format_description *desc = util_format_description(fmt); + int c = util_format_get_largest_non_void_channel(fmt); + assert(c >= 0); + const struct util_format_channel_description *chan = &desc->channel[c]; + + switch (chan->size) { + case 8: + if (chan->type == UTIL_FORMAT_TYPE_UNSIGNED) + return PCO_PCK_FMT_U8888; + else if (chan->type == UTIL_FORMAT_TYPE_SIGNED) + return PCO_PCK_FMT_S8888; + break; + + case 10: + if (chan->type == UTIL_FORMAT_TYPE_UNSIGNED) + return PCO_PCK_FMT_U1010102; + else if (chan->type == UTIL_FORMAT_TYPE_SIGNED) + return PCO_PCK_FMT_S1010102; + break; + + case 11: + if (chan->type == UTIL_FORMAT_TYPE_FLOAT) + return PCO_PCK_FMT_F111110; + break; + + case 16: + if (chan->type == UTIL_FORMAT_TYPE_UNSIGNED) + return PCO_PCK_FMT_U1616; + else if (chan->type == UTIL_FORMAT_TYPE_SIGNED) + return PCO_PCK_FMT_S1616; + else if (chan->type == UTIL_FORMAT_TYPE_FLOAT) + return PCO_PCK_FMT_F16F16; + break; + + default: + break; + } + + UNREACHABLE("Unsupported format."); +} + /** * \brief Translates a NIR intrinsic instruction into PCO. * @@ -1885,6 +1928,34 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) .rpt = pco_ref_get_chans(dest)); break; + case nir_intrinsic_pack_pco: { + enum pipe_format fmt = nir_intrinsic_format(intr); + enum pco_pck_fmt pck_fmt = pco_pck_format_from_pipe_format(fmt); + bool scale = util_format_is_unorm(fmt) || util_format_is_snorm(fmt); + unsigned chans = pco_ref_get_chans(src[0]); + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = chans, + .pck_fmt = pck_fmt, + .scale = scale); + break; + } + + case nir_intrinsic_unpack_pco: { + enum pipe_format fmt = nir_intrinsic_format(intr); + enum pco_pck_fmt pck_fmt = pco_pck_format_from_pipe_format(fmt); + unsigned chans = pco_ref_get_chans(dest); + bool scale = util_format_is_unorm(fmt) || util_format_is_snorm(fmt); + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = chans, + .pck_fmt = pck_fmt, + .scale = scale); + break; + } + default: printf("Unsupported intrinsic: \""); nir_print_instr(&intr->instr, stdout); @@ -2766,14 +2837,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) instr = trans_min_max(tctx, alu->op, dest, src[0], src[1]); break; - case nir_op_pack_half_16: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_F16F16); - break; - case nir_op_pack_half_2x16: instr = pco_pck(&tctx->b, dest, @@ -2782,14 +2845,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .pck_fmt = PCO_PCK_FMT_F16F16); break; - case nir_op_unpack_half_16: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_F16F16); - break; - case nir_op_unpack_half_2x16: instr = pco_unpck(&tctx->b, dest, @@ -2798,33 +2853,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .pck_fmt = PCO_PCK_FMT_F16F16); break; - case nir_op_pack_snorm_8: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_S8888, - .scale = true); - break; - - case nir_op_pack_snorm_8_8: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 2, - .pck_fmt = PCO_PCK_FMT_S8888, - .scale = true); - break; - - case nir_op_pack_snorm_8_8_8: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 3, - .pck_fmt = PCO_PCK_FMT_S8888, - .scale = true); - break; - case nir_op_pack_snorm_4x8: instr = pco_pck(&tctx->b, dest, @@ -2834,33 +2862,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; - case nir_op_unpack_snorm_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_S8888, - .scale = true); - break; - - case nir_op_unpack_snorm_8_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 2, - .pck_fmt = PCO_PCK_FMT_S8888, - .scale = true); - break; - - case nir_op_unpack_snorm_8_8_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 3, - .pck_fmt = PCO_PCK_FMT_S8888, - .scale = true); - break; - case nir_op_unpack_snorm_4x8: instr = pco_unpck(&tctx->b, dest, @@ -2870,65 +2871,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; - case nir_op_unpack_sscaled_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_S8888); - break; - - case nir_op_unpack_sscaled_8_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 2, - .pck_fmt = PCO_PCK_FMT_S8888); - break; - - case nir_op_unpack_sscaled_8_8_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 3, - .pck_fmt = PCO_PCK_FMT_S8888); - break; - - case nir_op_unpack_sscaled_8_8_8_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 4, - .pck_fmt = PCO_PCK_FMT_S8888); - break; - - case nir_op_pack_unorm_8: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_U8888, - .scale = true); - break; - - case nir_op_pack_unorm_8_8: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 2, - .pck_fmt = PCO_PCK_FMT_U8888, - .scale = true); - break; - - case nir_op_pack_unorm_8_8_8: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 3, - .pck_fmt = PCO_PCK_FMT_U8888, - .scale = true); - break; - case nir_op_pack_unorm_4x8: instr = pco_pck(&tctx->b, dest, @@ -2938,33 +2880,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; - case nir_op_unpack_unorm_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_U8888, - .scale = true); - break; - - case nir_op_unpack_unorm_8_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 2, - .pck_fmt = PCO_PCK_FMT_U8888, - .scale = true); - break; - - case nir_op_unpack_unorm_8_8_8: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 3, - .pck_fmt = PCO_PCK_FMT_U8888, - .scale = true); - break; - case nir_op_unpack_unorm_4x8: instr = pco_unpck(&tctx->b, dest, @@ -2974,137 +2889,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; - case nir_op_pack_unorm_10_10_10_2: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 4, - .pck_fmt = PCO_PCK_FORMAT_U1010102, - .scale = true); - break; - - case nir_op_unpack_unorm_10_10_10_2: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 4, - .pck_fmt = PCO_PCK_FMT_U1010102, - .scale = true); - break; - - case nir_op_pack_snorm_10_10_10_2: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 4, - .pck_fmt = PCO_PCK_FORMAT_S1010102, - .scale = true); - break; - - case nir_op_unpack_snorm_10_10_10_2: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 4, - .pck_fmt = PCO_PCK_FMT_S1010102, - .scale = true); - break; - - case nir_op_pack_uscaled_10_10_10_2: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 4, - .pck_fmt = PCO_PCK_FORMAT_U1010102, - .scale = false); - break; - - case nir_op_unpack_uscaled_10_10_10_2: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 4, - .pck_fmt = PCO_PCK_FMT_U1010102, - .scale = false); - break; - - case nir_op_pack_sscaled_10_10_10_2: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 4, - .pck_fmt = PCO_PCK_FORMAT_S1010102, - .scale = false); - break; - - case nir_op_unpack_sscaled_10_10_10_2: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 4, - .pck_fmt = PCO_PCK_FMT_S1010102, - .scale = false); - break; - - case nir_op_pack_unorm_10: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_U1010102, - .scale = true); - break; - - case nir_op_pack_snorm_10: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_S1010102, - .scale = true); - break; - - case nir_op_pack_float_10: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_2F10F10F10); - break; - - case nir_op_pack_float_11: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_F111110); - break; - - case nir_op_pack_float_11_11_10: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 3, - .pck_fmt = PCO_PCK_FMT_F111110); - break; - - case nir_op_unpack_float_11_11_10: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 3, - .pck_fmt = PCO_PCK_FMT_F111110); - break; - - case nir_op_pack_snorm_16: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_S1616, - .scale = true); - break; - case nir_op_pack_snorm_2x16: instr = pco_pck(&tctx->b, dest, @@ -3114,15 +2898,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; - case nir_op_unpack_snorm_16: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_S1616, - .scale = true); - break; - case nir_op_unpack_snorm_2x16: instr = pco_unpck(&tctx->b, dest, @@ -3132,15 +2907,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; - case nir_op_pack_unorm_16: - instr = pco_pck(&tctx->b, - dest, - src[0], - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_U1616, - .scale = true); - break; - case nir_op_pack_unorm_2x16: instr = pco_pck(&tctx->b, dest, @@ -3150,15 +2916,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; - case nir_op_unpack_unorm_16: - instr = pco_unpck(&tctx->b, - dest, - pco_ref_elem(src[0], 0), - .rpt = 1, - .pck_fmt = PCO_PCK_FMT_U1616, - .scale = true); - break; - case nir_op_unpack_unorm_2x16: instr = pco_unpck(&tctx->b, dest,