pco: replace {un,}packing alu ops with intrinsics

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-09-11 12:43:44 +01:00 committed by Marge Bot
parent 8104ef4e01
commit 6edb72d28b
6 changed files with 246 additions and 1243 deletions

View file

@ -90,16 +90,6 @@ constant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size)
}
}
/**
* Evaluate component 3 of packSnorm3x10_1x2.
*/
static uint16_t
pack_snorm_1x2(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 1.0f);
}
/**
* Evaluate one component of packSnorm4x8.
*/
@ -122,16 +112,6 @@ pack_snorm_1x8(float x)
_mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
}
/**
* Evaluate component [0,2] of packSnorm3x10_1x2.
*/
static uint16_t
pack_snorm_1x10(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 511.0f);
}
/**
* Evaluate one component of packSnorm2x16.
*/
@ -154,16 +134,6 @@ pack_snorm_1x16(float x)
_mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
}
/**
* Evaluate component 3 of unpackSnorm3x10_1x2.
*/
static float
unpack_snorm_1x2(uint16_t u)
{
u &= 0x0003;
return CLAMP((int16_t) u / 1.0f, -1.0f, +1.0f);
}
/**
* Evaluate one component of unpackSnorm4x8.
*/
@ -182,16 +152,6 @@ unpack_snorm_1x8(uint8_t u)
return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
}
/**
* Evaluate component [0,2] of unpackSnorm3x10_1x2.
*/
static float
unpack_snorm_1x10(uint16_t u)
{
u &= 0x03FF;
return CLAMP((int16_t) u / 511.0f, -1.0f, +1.0f);
}
/**
* Evaluate one component of unpackSnorm2x16.
*/
@ -210,16 +170,6 @@ unpack_snorm_1x16(uint16_t u)
return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
}
/**
* Evaluate component 3 of packUnorm3x10_1x2.
*/
static uint16_t
pack_unorm_1x2(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 3.0f);
}
/**
* Evaluate one component packUnorm4x8.
*/
@ -239,16 +189,6 @@ pack_unorm_1x8(float x)
_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
}
/**
* Evaluate component [0,2] of packUnorm3x10_1x2.
*/
static uint16_t
pack_unorm_1x10(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 1023.0f);
}
/**
* Evaluate one component packUnorm2x16.
*/
@ -268,16 +208,6 @@ pack_unorm_1x16(float x)
_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
}
/**
* Evaluate component 3 of unpackUnorm3x10_1x2.
*/
static float
unpack_unorm_1x2(uint16_t u)
{
u &= 0x0003;
return (float) u / 3.0f;
}
/**
* Evaluate one component of unpackUnorm4x8.
*/
@ -296,16 +226,6 @@ unpack_unorm_1x8(uint8_t u)
return (float) u / 255.0f;
}
/**
* Evaluate component [0,2] of unpackUnorm3x10_1x2.
*/
static float
unpack_unorm_1x10(uint16_t u)
{
u &= 0x03FF;
return (float) u / 1023.0f;
}
/**
* Evaluate one component of unpackUnorm2x16.
*/
@ -324,95 +244,6 @@ unpack_unorm_1x16(uint16_t u)
return (float) u / 65535.0f;
}
/**
* Evaluate component 3 of packUscaled3x10_1x2.
*/
static uint16_t
pack_uscaled_1x2(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, 0.0f, 3.0f));
}
/**
* Evaluate component [0,2] of packUscaled3x10_1x2.
*/
static uint16_t
pack_uscaled_1x10(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, 0.0f, 1023.0f));
}
/**
* Evaluate component 3 of packSscaled3x10_1x2.
*/
static uint16_t
pack_sscaled_1x2(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, -2.0f, +1.0f));
}
/**
* Evaluate component [0,2] of packSscaled3x10_1x2.
*/
static uint16_t
pack_sscaled_1x10(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, -512.0f, +511.0f));
}
/**
* Evaluate component 3 of unpackSscaled3x10_1x2.
*/
static float
unpack_sscaled_1x2(uint16_t u)
{
u &= 0x0003;
return CLAMP((int16_t) u, -2.0f, +1.0f);
}
/**
* Evaluate one component of unpackSscaled4x8.
*/
static float
unpack_sscaled_1x8(uint8_t u)
{
return CLAMP((int8_t) u, -128.0f, +127.0f);
}
/**
* Evaluate component [0,2] of unpackSscaled3x10_1x2.
*/
static float
unpack_sscaled_1x10(uint16_t u)
{
u &= 0x03FF;
return CLAMP((int16_t) u, -512.0f, +511.0f);
}
/**
* Evaluate component 3 of unpackUscaled3x10_1x2.
*/
static float
unpack_uscaled_1x2(uint16_t u)
{
u &= 0x0003;
return (float) u;
}
/**
* Evaluate component [0,2] of unpackUscaled3x10_1x2.
*/
static float
unpack_uscaled_1x10(uint16_t u)
{
u &= 0x03FF;
return (float) u;
}
/**
* Evaluate one component of packHalf2x16.
*/

View file

@ -2754,4 +2754,6 @@ intrinsic("alpha_to_coverage_pco", src_comp=[1], dest_comp=1, flags=[CAN_REORDER
index("bool", "scale")
index("bool", "roundzero")
intrinsic("pack_pco", src_comp=[0], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[FORMAT], bit_sizes=[32])
intrinsic("unpack_pco", src_comp=[1], dest_comp=0, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[FORMAT], bit_sizes=[32])
intrinsic("pck_prog_pco", src_comp=[0, 1], dest_comp=0, flags=[CAN_ELIMINATE, CAN_REORDER], indices=[SCALE, ROUNDZERO], bit_sizes=[32])

View file

@ -278,20 +278,6 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
*/
return NULL;
case nir_op_unpack_snorm_8_8:
case nir_op_unpack_sscaled_8_8:
case nir_op_unpack_unorm_8_8:
case nir_op_unpack_snorm_8_8_8:
case nir_op_unpack_sscaled_8_8_8:
case nir_op_unpack_unorm_8_8_8:
case nir_op_unpack_sscaled_8_8_8_8:
case nir_op_unpack_snorm_10_10_10_2:
case nir_op_unpack_unorm_10_10_10_2:
case nir_op_unpack_sscaled_10_10_10_2:
case nir_op_unpack_uscaled_10_10_10_2:
case nir_op_unpack_float_11_11_10:
return NULL;
case nir_op_unpack_half_2x16: {
if (!b->shader->options->lower_unpack_half_2x16)
return NULL;

View file

@ -1810,124 +1810,3 @@ unop_numeric_convert("f2e4m3fn_satfn", tuint8, tfloat32, "isinf(src0) ? 0x7f : _
unop_numeric_convert("e5m22f", tfloat32, tuint8, "_mesa_e5m2_to_float(src0)")
unop_numeric_convert("f2e5m2", tuint8, tfloat32, "_mesa_float_to_e5m2(src0)")
unop_numeric_convert("f2e5m2_sat", tuint8, tfloat32, "_mesa_float_to_e5m2_sat(src0)")
def pack_r(fmt, r):
unop_horiz(f"pack_{fmt}_{r}", 1, tuint32, 1, tfloat32, f"""
dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x);
""")
def pack_rg(fmt, r, g):
unop_horiz(f"pack_{fmt}_{r}_{g}", 1, tuint32, 2, tfloat32, f"""
dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x);
dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r};
""")
def pack_rgb(fmt, r, g, b):
unop_horiz(f"pack_{fmt}_{r}_{g}_{b}", 1, tuint32, 3, tfloat32, f"""
dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x);
dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r};
dst.x |= ((uint32_t) pack_{fmt}_1x{b}(src0.z)) << {r + g};
""")
def pack_rgba(fmt, r, g, b, a):
unop_horiz(f"pack_{fmt}_{r}_{g}_{b}_{a}", 1, tuint32, 4, tfloat32, f"""
dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x);
dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r};
dst.x |= ((uint32_t) pack_{fmt}_1x{b}(src0.z)) << {r + g};
dst.x |= ((uint32_t) pack_{fmt}_1x{a}(src0.w)) << {r + g + b};
""")
pack_r("snorm", 8)
pack_r("unorm", 8)
pack_rg("snorm", 8, 8)
pack_rg("unorm", 8, 8)
pack_rgb("snorm", 8, 8, 8)
pack_rgb("unorm", 8, 8, 8)
pack_r("snorm", 10)
pack_r("unorm", 10)
pack_r("snorm", 16)
pack_r("unorm", 16)
pack_r("half", 16)
pack_rgba("snorm", 10, 10, 10, 2)
pack_rgba("unorm", 10, 10, 10, 2)
pack_rgba("sscaled", 10, 10, 10, 2)
pack_rgba("uscaled", 10, 10, 10, 2)
unop_horiz(f"pack_float_10", 1, tuint32, 1, tfloat32, f"""
dst.x = f32_to_uf10(src0.x) & 0x3ff;
""")
unop_horiz(f"pack_float_11", 1, tuint32, 1, tfloat32, f"""
dst.x = f32_to_uf11(src0.x) & 0x7ff;
""")
unop_horiz(f"pack_float_11_11_10", 1, tuint32, 3, tfloat32, f"""
dst.x = f32_to_uf11(src0.x) & 0x7ff;
dst.x |= (f32_to_uf11(src0.y) & 0x7ff) << 11;
dst.x |= (f32_to_uf10(src0.z) & 0x3ff) << 22;
""")
def unpack_r(fmt, r):
unop_horiz(f"unpack_{fmt}_{r}", 1, tfloat32, 1, tuint32, f"""
dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1));
""")
def unpack_rg(fmt, r, g):
unop_horiz(f"unpack_{fmt}_{r}_{g}", 2, tfloat32, 1, tuint32, f"""
dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1));
dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1));
""")
def unpack_rgb(fmt, r, g, b):
unop_horiz(f"unpack_{fmt}_{r}_{g}_{b}", 3, tfloat32, 1, tuint32, f"""
dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1));
dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1));
dst.z = unpack_{fmt}_1x{b}((src0.x >> {r + g}) & ((1u << {b}) - 1));
""")
def unpack_rgba(fmt, r, g, b, a):
unop_horiz(f"unpack_{fmt}_{r}_{g}_{b}_{a}", 4, tfloat32, 1, tuint32, f"""
dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1));
dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1));
dst.z = unpack_{fmt}_1x{b}((src0.x >> {r + g}) & ((1u << {b}) - 1));
dst.w = unpack_{fmt}_1x{a}((src0.x >> {r + g + b}) & ((1u << {a}) - 1));
""")
unpack_r("snorm", 8)
unpack_r("sscaled", 8)
unpack_r("unorm", 8)
unpack_rg("snorm", 8, 8)
unpack_rg("sscaled", 8, 8)
unpack_rg("unorm", 8, 8)
unpack_rgb("snorm", 8, 8, 8)
unpack_rgb("sscaled", 8, 8, 8)
unpack_rgb("unorm", 8, 8, 8)
unpack_rgba("sscaled", 8, 8, 8, 8)
unpack_r("snorm", 16)
unpack_r("unorm", 16)
unop_horiz(f"unpack_half_16", 1, tfloat32, 1, tuint32, """
dst.x = unpack_half_1x16(src0.x & ((1u << 16) - 1), nir_is_denorm_flush_to_zero(execution_mode, 16));
""")
unpack_rgba("snorm", 10, 10, 10, 2)
unpack_rgba("unorm", 10, 10, 10, 2)
unpack_rgba("sscaled", 10, 10, 10, 2)
unpack_rgba("uscaled", 10, 10, 10, 2)
unop_horiz(f"unpack_float_11_11_10", 3, tfloat32, 1, tuint32, f"""
dst.x = uf11_to_f32(src0.x & 0x7ff);
dst.y = uf11_to_f32((src0.x >> 11) & 0x7ff);
dst.z = uf10_to_f32((src0.x >> 22) & 0x3ff);
""")

View file

@ -118,6 +118,30 @@ to_pbe_format(nir_builder *b, enum pipe_format format, nir_def **input)
return format;
}
static unsigned format_chans_per_dword(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
unsigned accum_bits = 0;
for (unsigned u = 0; u < desc->nr_channels; ++u) {
/* Exactly one dword, great! */
if (accum_bits == 32)
return u;
/* Went over, back off by one. */
if (accum_bits > 32) {
/* We don't support formats with channels > 1 dword. */
assert(u > 1);
return u - 1;
}
accum_bits += desc->channel[u].size;
}
/* Loop finished, all channels can fit. */
return desc->nr_channels;
}
static nir_def *pack_to_format(nir_builder *b,
nir_def *input,
nir_alu_type src_type,
@ -125,628 +149,153 @@ static nir_def *pack_to_format(nir_builder *b,
{
const struct util_format_description *desc = util_format_description(format);
src_type = nir_alu_type_get_base_type(src_type);
nir_def *zero = nir_imm_int(b, 0);
nir_def *one = nir_alu_type_get_base_type(src_type) == nir_type_float
? nir_imm_float(b, 1.0f)
: nir_imm_int(b, 1);
nir_def *input_comps[4];
for (unsigned u = 0; u < desc->nr_channels; ++u) {
/* Populate any missing components .*/
for (unsigned u = 0; u < ARRAY_SIZE(input_comps); ++u) {
enum pipe_swizzle s = desc->swizzle[u];
if (s <= PIPE_SWIZZLE_W) {
if (s <= PIPE_SWIZZLE_W)
input_comps[u] = nir_channel(b, input, s);
} else if (s == PIPE_SWIZZLE_0) {
input_comps[u] = nir_imm_int(b, 0);
} else if (s == PIPE_SWIZZLE_1) {
input_comps[u] = src_type == nir_type_float ? nir_imm_float(b, 1.0f)
: nir_imm_int(b, 1);
} else {
else if (s == PIPE_SWIZZLE_0)
input_comps[u] = zero;
else if (s == PIPE_SWIZZLE_1)
input_comps[u] = one;
else
UNREACHABLE("");
}
unsigned format_bits = util_format_get_blocksizebits(format);
unsigned format_dwords = DIV_ROUND_UP(format_bits, 32);
nir_def *packed_comps[] = { zero, zero, zero, zero };
/* Special case: no packing required. */
if (util_format_get_max_channel_size(format) == 32)
return nir_vec(b, input_comps, format_dwords);
/* Special case: can't be packed with op, need bit-packing instead. */
if (util_format_is_pure_integer(format)) {
for (unsigned u = 0; u < desc->nr_channels; ++u) {
unsigned dword = desc->channel[u].shift / 32;
unsigned offset = desc->channel[u].shift % 32;
unsigned size = desc->channel[u].size;
packed_comps[dword] = nir_bitfield_insert_imm(b,
packed_comps[dword],
input_comps[u],
offset,
size);
}
return nir_vec(b, packed_comps, format_dwords);
}
unsigned chans_per_dword = format_chans_per_dword(format);
unsigned chans_remaining = desc->nr_channels;
input = nir_vec(b, input_comps, desc->nr_channels);
for (unsigned u = 0; u < format_dwords; ++u) {
unsigned chans_to_pack =
chans_remaining > chans_per_dword ? chans_per_dword : chans_remaining;
unsigned chans_packed = desc->nr_channels - chans_remaining;
nir_def *zero = nir_imm_int(b, 0);
nir_def *packed[4] = { zero, zero, zero, zero };
switch (format) {
case PIPE_FORMAT_R8_UNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_unorm_8(b, input);
break;
nir_def *input_chans =
nir_channels(b, input, BITFIELD_RANGE(chans_packed, chans_to_pack));
packed_comps[u] = nir_pack_pco(b, input_chans, .format = format);
case PIPE_FORMAT_R8G8_UNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_unorm_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8_UNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_unorm_8_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_unorm_4x8(b, input);
break;
case PIPE_FORMAT_R8_SNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_snorm_8(b, input);
break;
case PIPE_FORMAT_R8G8_SNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_snorm_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8_SNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_snorm_8_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8A8_SNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_snorm_4x8(b, input);
break;
case PIPE_FORMAT_R8G8B8A8_UINT:
case PIPE_FORMAT_R8G8B8A8_SINT:
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[3], 24, 8);
FALLTHROUGH;
case PIPE_FORMAT_R8G8B8_UINT:
case PIPE_FORMAT_R8G8B8_SINT:
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[2], 16, 8);
FALLTHROUGH;
case PIPE_FORMAT_R8G8_UINT:
case PIPE_FORMAT_R8G8_SINT:
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 8, 8);
FALLTHROUGH;
case PIPE_FORMAT_R8_UINT:
case PIPE_FORMAT_R8_SINT:
assert(src_type != nir_type_float);
/* TODO: sat/clamp? */
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 8);
break;
case PIPE_FORMAT_R10G10B10A2_UINT:
assert(src_type == nir_type_uint);
/* TODO: sat/clamp? */
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 10);
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 10, 10);
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[2], 20, 10);
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[3], 30, 2);
break;
case PIPE_FORMAT_R11G11B10_FLOAT:
assert(src_type == nir_type_float);
packed[0] = nir_pack_float_11_11_10(b, input_comps[0]);
break;
case PIPE_FORMAT_R16_UNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_unorm_16(b, input_comps[0]);
break;
case PIPE_FORMAT_R16G16_UNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_unorm_2x16(b, input);
break;
case PIPE_FORMAT_R16G16B16_UNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b0011));
packed[1] = nir_pack_unorm_16(b, input_comps[2]);
break;
case PIPE_FORMAT_R16G16B16A16_UNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b0011));
packed[1] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b1100));
break;
case PIPE_FORMAT_R16_SNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_snorm_16(b, input_comps[0]);
break;
case PIPE_FORMAT_R16G16_SNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_snorm_2x16(b, input);
break;
case PIPE_FORMAT_R16G16B16_SNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b0011));
packed[1] = nir_pack_snorm_16(b, input_comps[2]);
break;
case PIPE_FORMAT_R16G16B16A16_SNORM:
assert(src_type == nir_type_float);
packed[0] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b0011));
packed[1] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b1100));
break;
case PIPE_FORMAT_R16G16B16A16_UINT:
case PIPE_FORMAT_R16G16B16A16_SINT:
packed[1] = nir_bitfield_insert_imm(b, packed[1], input_comps[3], 16, 16);
FALLTHROUGH;
case PIPE_FORMAT_R16G16B16_UINT:
case PIPE_FORMAT_R16G16B16_SINT:
packed[1] = nir_bitfield_insert_imm(b, packed[1], input_comps[2], 0, 16);
FALLTHROUGH;
case PIPE_FORMAT_R16G16_UINT:
case PIPE_FORMAT_R16G16_SINT:
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 16, 16);
FALLTHROUGH;
case PIPE_FORMAT_R16_UINT:
case PIPE_FORMAT_R16_SINT:
assert(src_type != nir_type_float);
/* TODO: sat/clamp? */
packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 16);
break;
case PIPE_FORMAT_R16_FLOAT:
assert(src_type == nir_type_float);
packed[0] = nir_pack_half_16(b, input);
break;
case PIPE_FORMAT_R16G16_FLOAT:
assert(src_type == nir_type_float);
packed[0] = nir_pack_half_2x16(b, input);
break;
case PIPE_FORMAT_R16G16B16_FLOAT:
assert(src_type == nir_type_float);
packed[0] = nir_pack_half_2x16(b, nir_channels(b, input, 0b0011));
packed[1] = nir_pack_half_16(b, input_comps[2]);
break;
case PIPE_FORMAT_R16G16B16A16_FLOAT:
assert(src_type == nir_type_float);
packed[0] = nir_pack_half_2x16(b, nir_channels(b, input, 0b0011));
packed[1] = nir_pack_half_2x16(b, nir_channels(b, input, 0b1100));
break;
case PIPE_FORMAT_R32G32B32A32_UINT:
case PIPE_FORMAT_R32G32B32A32_SINT:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
packed[3] = input_comps[3];
FALLTHROUGH;
case PIPE_FORMAT_R32G32B32_UINT:
case PIPE_FORMAT_R32G32B32_SINT:
case PIPE_FORMAT_R32G32B32_FLOAT:
packed[2] = input_comps[2];
FALLTHROUGH;
case PIPE_FORMAT_R32G32_UINT:
case PIPE_FORMAT_R32G32_SINT:
case PIPE_FORMAT_R32G32_FLOAT:
packed[1] = input_comps[1];
FALLTHROUGH;
case PIPE_FORMAT_R32_UINT:
case PIPE_FORMAT_R32_SINT:
case PIPE_FORMAT_R32_FLOAT:
packed[0] = input_comps[0];
break;
default:
printf("Unsupported pack format %s.\n", util_format_name(format));
UNREACHABLE("");
chans_remaining -= chans_to_pack;
}
unsigned packed_comps = 1;
if (packed[3] != zero)
packed_comps = 4;
else if (packed[2] != zero)
packed_comps = 3;
else if (packed[1] != zero)
packed_comps = 2;
assert(packed[0] != zero);
return nir_vec(b, packed, packed_comps);
assert(!chans_remaining);
return nir_vec(b, packed_comps, format_dwords);
}
static nir_def *unpack_from_format(nir_builder *b,
nir_def *input,
nir_def *packed_comps[static 4],
nir_alu_type dest_type,
enum pipe_format format)
enum pipe_format format,
unsigned components_needed)
{
const struct util_format_description *desc = util_format_description(format);
dest_type = nir_alu_type_get_base_type(dest_type);
nir_def *unpacked_comps[4];
nir_def *input_comps[4] = {
nir_channel(b, input, 0),
nir_channel(b, input, 1),
nir_channel(b, input, 2),
nir_channel(b, input, 3),
};
unsigned format_bits = util_format_get_blocksizebits(format);
unsigned format_dwords = DIV_ROUND_UP(format_bits, 32);
nir_def *unpacked = nir_undef(b, 4, 32);
switch (format) {
case PIPE_FORMAT_R8_UNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_unorm_8(b, input);
break;
case PIPE_FORMAT_R8G8_UNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_unorm_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8_UNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_unorm_8_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8A8_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_unorm_4x8(b, input);
break;
case PIPE_FORMAT_R8_SNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_snorm_8(b, input);
break;
case PIPE_FORMAT_R8G8_SNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_snorm_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8_SNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_snorm_8_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8A8_SNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_snorm_4x8(b, input);
break;
case PIPE_FORMAT_R8_SSCALED:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_sscaled_8(b, input);
break;
case PIPE_FORMAT_R8G8_SSCALED:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_sscaled_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8_SSCALED:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_sscaled_8_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8A8_SSCALED:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_sscaled_8_8_8_8(b, input);
break;
case PIPE_FORMAT_R8G8B8A8_UINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ubitfield_extract_imm(b, input_comps[0], 24, 8),
3);
FALLTHROUGH;
case PIPE_FORMAT_R8G8B8_UINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ubitfield_extract_imm(b, input_comps[0], 16, 8),
2);
FALLTHROUGH;
case PIPE_FORMAT_R8G8_UINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ubitfield_extract_imm(b, input_comps[0], 8, 8),
1);
FALLTHROUGH;
case PIPE_FORMAT_R8_UINT:
assert(dest_type == nir_type_uint);
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ubitfield_extract_imm(b, input_comps[0], 0, 8),
0);
break;
case PIPE_FORMAT_R8G8B8A8_SINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ibitfield_extract_imm(b, input_comps[0], 24, 8),
3);
FALLTHROUGH;
case PIPE_FORMAT_R8G8B8_SINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ibitfield_extract_imm(b, input_comps[0], 16, 8),
2);
FALLTHROUGH;
case PIPE_FORMAT_R8G8_SINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ibitfield_extract_imm(b, input_comps[0], 8, 8),
1);
FALLTHROUGH;
case PIPE_FORMAT_R8_SINT:
assert(dest_type == nir_type_int);
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ibitfield_extract_imm(b, input_comps[0], 0, 8),
0);
break;
case PIPE_FORMAT_R10G10B10A2_UNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_unorm_10_10_10_2(b, input_comps[0]);
break;
case PIPE_FORMAT_R10G10B10A2_SNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_snorm_10_10_10_2(b, input_comps[0]);
break;
case PIPE_FORMAT_R10G10B10A2_UINT:
assert(dest_type == nir_type_uint);
unpacked = nir_vec4(b,
nir_ubitfield_extract_imm(b, input_comps[0], 0, 10),
nir_ubitfield_extract_imm(b, input_comps[0], 10, 10),
nir_ubitfield_extract_imm(b, input_comps[0], 20, 10),
nir_ubitfield_extract_imm(b, input_comps[0], 30, 2));
break;
case PIPE_FORMAT_R10G10B10A2_USCALED:
case PIPE_FORMAT_B10G10R10A2_USCALED:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_uscaled_10_10_10_2(b, input_comps[0]);
break;
case PIPE_FORMAT_R10G10B10A2_SSCALED:
case PIPE_FORMAT_B10G10R10A2_SSCALED:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_sscaled_10_10_10_2(b, input_comps[0]);
break;
case PIPE_FORMAT_R11G11B10_FLOAT:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_float_11_11_10(b, input_comps[0]);
break;
case PIPE_FORMAT_R16_UNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_unorm_16(b, input_comps[0]);
break;
case PIPE_FORMAT_R16G16_UNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_unorm_2x16(b, input_comps[0]);
break;
case PIPE_FORMAT_R16G16B16_UNORM: {
assert(dest_type == nir_type_float);
nir_def *lo2 = nir_unpack_unorm_2x16(b, input_comps[0]);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1);
unpacked = nir_vector_insert_imm(b,
unpacked,
nir_unpack_unorm_16(b, input_comps[1]),
2);
break;
/* Special case: no unpacking required. */
if (util_format_get_max_channel_size(format) == 32) {
for (unsigned u = 0; u < desc->nr_channels; ++u)
unpacked_comps[u] = packed_comps[u];
}
case PIPE_FORMAT_R16G16B16A16_UNORM:
assert(dest_type == nir_type_float);
nir_def *lo2 = nir_unpack_unorm_2x16(b, input_comps[0]);
nir_def *hi2 = nir_unpack_unorm_2x16(b, input_comps[1]);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3);
break;
/* Special case: can't be unpacked with op, need bit-unpacking instead. */
else if (util_format_is_pure_integer(format)) {
nir_def *(*nir_bitfield_extract_imm)(nir_builder *,
nir_def *,
uint32_t,
uint32_t) =
util_format_is_pure_uint(format) ? nir_ubitfield_extract_imm
: nir_ibitfield_extract_imm;
case PIPE_FORMAT_R16_SNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_snorm_16(b, input_comps[0]);
break;
for (unsigned u = 0; u < desc->nr_channels; ++u) {
unsigned dword = desc->channel[u].shift / 32;
unsigned offset = desc->channel[u].shift % 32;
unsigned size = desc->channel[u].size;
case PIPE_FORMAT_R16G16_SNORM:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_snorm_2x16(b, input_comps[0]);
break;
case PIPE_FORMAT_R16G16B16_SNORM: {
assert(dest_type == nir_type_float);
nir_def *lo2 = nir_unpack_snorm_2x16(b, input_comps[0]);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1);
unpacked = nir_vector_insert_imm(b,
unpacked,
nir_unpack_snorm_16(b, input_comps[1]),
2);
break;
}
case PIPE_FORMAT_R16G16B16A16_SNORM: {
assert(dest_type == nir_type_float);
nir_def *lo2 = nir_unpack_snorm_2x16(b, input_comps[0]);
nir_def *hi2 = nir_unpack_snorm_2x16(b, input_comps[1]);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3);
break;
}
case PIPE_FORMAT_R16G16B16A16_UINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ubitfield_extract_imm(b, input_comps[1], 16, 16),
3);
FALLTHROUGH;
case PIPE_FORMAT_R16G16B16_UINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ubitfield_extract_imm(b, input_comps[1], 0, 16),
2);
FALLTHROUGH;
case PIPE_FORMAT_R16G16_UINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ubitfield_extract_imm(b, input_comps[0], 16, 16),
1);
FALLTHROUGH;
case PIPE_FORMAT_R16_UINT:
assert(dest_type == nir_type_uint);
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ubitfield_extract_imm(b, input_comps[0], 0, 16),
0);
break;
case PIPE_FORMAT_R16G16B16A16_SINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ibitfield_extract_imm(b, input_comps[1], 16, 16),
3);
FALLTHROUGH;
case PIPE_FORMAT_R16G16B16_SINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ibitfield_extract_imm(b, input_comps[1], 0, 16),
2);
FALLTHROUGH;
case PIPE_FORMAT_R16G16_SINT:
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ibitfield_extract_imm(b, input_comps[0], 16, 16),
1);
FALLTHROUGH;
case PIPE_FORMAT_R16_SINT:
assert(dest_type == nir_type_int);
unpacked = nir_vector_insert_imm(
b,
unpacked,
nir_ibitfield_extract_imm(b, input_comps[0], 0, 16),
0);
break;
case PIPE_FORMAT_R16_FLOAT:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_half_16(b, input_comps[0]);
break;
case PIPE_FORMAT_R16G16_FLOAT:
assert(dest_type == nir_type_float);
unpacked = nir_unpack_half_2x16(b, input_comps[0]);
break;
case PIPE_FORMAT_R16G16B16_FLOAT: {
assert(dest_type == nir_type_float);
nir_def *lo2 = nir_unpack_half_2x16(b, input_comps[0]);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1);
unpacked = nir_vector_insert_imm(b,
unpacked,
nir_unpack_half_16(b, input_comps[1]),
2);
break;
}
case PIPE_FORMAT_R16G16B16A16_FLOAT: {
assert(dest_type == nir_type_float);
nir_def *lo2 = nir_unpack_half_2x16(b, input_comps[0]);
nir_def *hi2 = nir_unpack_half_2x16(b, input_comps[1]);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2);
unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3);
break;
}
case PIPE_FORMAT_R32G32B32A32_UINT:
case PIPE_FORMAT_R32G32B32A32_SINT:
case PIPE_FORMAT_R32G32B32A32_FLOAT:
unpacked = nir_vector_insert_imm(b, unpacked, input_comps[3], 3);
FALLTHROUGH;
case PIPE_FORMAT_R32G32B32_UINT:
case PIPE_FORMAT_R32G32B32_SINT:
case PIPE_FORMAT_R32G32B32_FLOAT:
unpacked = nir_vector_insert_imm(b, unpacked, input_comps[2], 2);
FALLTHROUGH;
case PIPE_FORMAT_R32G32_UINT:
case PIPE_FORMAT_R32G32_SINT:
case PIPE_FORMAT_R32G32_FLOAT:
unpacked = nir_vector_insert_imm(b, unpacked, input_comps[1], 1);
FALLTHROUGH;
case PIPE_FORMAT_R32_UINT:
case PIPE_FORMAT_R32_SINT:
case PIPE_FORMAT_R32_FLOAT:
unpacked = nir_vector_insert_imm(b, unpacked, input_comps[0], 0);
break;
default:
printf("Unsupported unpack format %s.\n", util_format_name(format));
UNREACHABLE("");
}
nir_def *output_comps[4];
for (unsigned u = 0; u < ARRAY_SIZE(output_comps); ++u) {
enum pipe_swizzle s = desc->swizzle[u];
if (s <= PIPE_SWIZZLE_W) {
output_comps[u] = nir_channel(b, unpacked, s);
} else if (s == PIPE_SWIZZLE_0) {
output_comps[u] = nir_imm_int(b, 0);
} else if (s == PIPE_SWIZZLE_1) {
output_comps[u] = dest_type == nir_type_float ? nir_imm_float(b, 1.0f)
: nir_imm_int(b, 1);
} else {
UNREACHABLE("");
unpacked_comps[u] =
nir_bitfield_extract_imm(b, packed_comps[dword], offset, size);
}
}
return nir_vec(b, output_comps, ARRAY_SIZE(output_comps));
else {
unsigned chans_per_dword = format_chans_per_dword(format);
unsigned chans_remaining = desc->nr_channels;
for (unsigned u = 0; u < format_dwords; ++u) {
unsigned chans_to_unpack = chans_remaining > chans_per_dword
? chans_per_dword
: chans_remaining;
nir_def *unpacked = nir_unpack_pco(b,
chans_to_unpack,
packed_comps[u],
.format = format);
unsigned chans_unpacked = desc->nr_channels - chans_remaining;
for (unsigned v = 0; v < chans_to_unpack; ++v)
unpacked_comps[chans_unpacked + v] = nir_channel(b, unpacked, v);
chans_remaining -= chans_to_unpack;
}
assert(!chans_remaining);
}
nir_def *zero = nir_imm_int(b, 0);
nir_def *one = nir_alu_type_get_base_type(dest_type) == nir_type_float
? nir_imm_float(b, 1.0f)
: nir_imm_int(b, 1);
nir_def *output_comps[4];
/* Populate any missing components .*/
for (unsigned u = 0; u < ARRAY_SIZE(output_comps); ++u) {
enum pipe_swizzle s = desc->swizzle[u];
if (s <= PIPE_SWIZZLE_W)
output_comps[u] = unpacked_comps[s];
else if (s == PIPE_SWIZZLE_0)
output_comps[u] = zero;
else if (s == PIPE_SWIZZLE_1)
output_comps[u] = one;
else
UNREACHABLE("");
}
return nir_vec(b, output_comps, components_needed);
}
static inline bool is_processed(nir_intrinsic_instr *intr)
@ -854,30 +403,29 @@ static nir_def *lower_pfo_load(nir_builder *b,
format = to_pbe_format(b, format, NULL);
nir_def *input_comps[4];
for (unsigned c = 0; c < ARRAY_SIZE(input_comps); ++c) {
input_comps[c] = nir_load_output(b,
1,
32,
offset->ssa,
.base = base,
.component = c,
.dest_type = nir_type_invalid | 32,
.io_semantics = io_semantics);
nir_def *packed_comps[4];
for (unsigned c = 0; c < ARRAY_SIZE(packed_comps); ++c) {
packed_comps[c] = nir_load_output(b,
1,
32,
offset->ssa,
.base = base,
.component = c,
.dest_type = nir_type_invalid | 32,
.io_semantics = io_semantics);
nir_intrinsic_instr *load =
nir_instr_as_intrinsic(input_comps[c]->parent_instr);
nir_instr_as_intrinsic(packed_comps[c]->parent_instr);
util_dynarray_append(&state->loads, nir_intrinsic_instr *, load);
}
nir_def *input = nir_vec(b, input_comps, ARRAY_SIZE(input_comps));
nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
nir_def *output = unpack_from_format(b, input, dest_type, format);
if (output->num_components > intr->def.num_components)
output = nir_trim_vector(b, output, intr->def.num_components);
return output;
return unpack_from_format(b,
packed_comps,
dest_type,
format,
intr->def.num_components);
}
/**
@ -1412,23 +960,23 @@ bool pco_nir_pvi(nir_shader *shader, pco_vs_data *vs)
DIV_ROUND_UP(util_format_get_blocksize(format), sizeof(uint32_t));
var->type = glsl_uvec_type(format_dwords);
nir_def *input_comps[4];
for (unsigned c = 0; c < ARRAY_SIZE(input_comps); ++c) {
input_comps[c] = nir_load_input(&b,
1,
32,
nir_imm_int(&b, 0),
.range = 1,
.component = c,
.dest_type = nir_type_invalid | 32,
.io_semantics = (nir_io_semantics){
.location = location,
.num_slots = 1,
});
nir_def *packed_comps[4];
for (unsigned c = 0; c < ARRAY_SIZE(packed_comps); ++c) {
packed_comps[c] = nir_load_input(&b,
1,
32,
nir_imm_int(&b, 0),
.range = 1,
.component = c,
.dest_type = nir_type_invalid | 32,
.io_semantics = (nir_io_semantics){
.location = location,
.num_slots = 1,
});
}
nir_def *input = nir_vec(&b, input_comps, ARRAY_SIZE(input_comps));
state.attribs[u] = unpack_from_format(&b, input, base_type, format);
state.attribs[u] =
unpack_from_format(&b, packed_comps, base_type, format, 4);
}
nir_shader_lower_instructions(shader, is_pvi, lower_pvi, &state);

View file

@ -1574,6 +1574,49 @@ static pco_instr *trans_reg_intr(trans_ctx *tctx,
UNREACHABLE("");
}
static enum pco_pck_fmt pco_pck_format_from_pipe_format(enum pipe_format fmt)
{
const struct util_format_description *desc = util_format_description(fmt);
int c = util_format_get_largest_non_void_channel(fmt);
assert(c >= 0);
const struct util_format_channel_description *chan = &desc->channel[c];
switch (chan->size) {
case 8:
if (chan->type == UTIL_FORMAT_TYPE_UNSIGNED)
return PCO_PCK_FMT_U8888;
else if (chan->type == UTIL_FORMAT_TYPE_SIGNED)
return PCO_PCK_FMT_S8888;
break;
case 10:
if (chan->type == UTIL_FORMAT_TYPE_UNSIGNED)
return PCO_PCK_FMT_U1010102;
else if (chan->type == UTIL_FORMAT_TYPE_SIGNED)
return PCO_PCK_FMT_S1010102;
break;
case 11:
if (chan->type == UTIL_FORMAT_TYPE_FLOAT)
return PCO_PCK_FMT_F111110;
break;
case 16:
if (chan->type == UTIL_FORMAT_TYPE_UNSIGNED)
return PCO_PCK_FMT_U1616;
else if (chan->type == UTIL_FORMAT_TYPE_SIGNED)
return PCO_PCK_FMT_S1616;
else if (chan->type == UTIL_FORMAT_TYPE_FLOAT)
return PCO_PCK_FMT_F16F16;
break;
default:
break;
}
UNREACHABLE("Unsupported format.");
}
/**
* \brief Translates a NIR intrinsic instruction into PCO.
*
@ -1885,6 +1928,34 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
.rpt = pco_ref_get_chans(dest));
break;
case nir_intrinsic_pack_pco: {
enum pipe_format fmt = nir_intrinsic_format(intr);
enum pco_pck_fmt pck_fmt = pco_pck_format_from_pipe_format(fmt);
bool scale = util_format_is_unorm(fmt) || util_format_is_snorm(fmt);
unsigned chans = pco_ref_get_chans(src[0]);
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = chans,
.pck_fmt = pck_fmt,
.scale = scale);
break;
}
case nir_intrinsic_unpack_pco: {
enum pipe_format fmt = nir_intrinsic_format(intr);
enum pco_pck_fmt pck_fmt = pco_pck_format_from_pipe_format(fmt);
unsigned chans = pco_ref_get_chans(dest);
bool scale = util_format_is_unorm(fmt) || util_format_is_snorm(fmt);
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = chans,
.pck_fmt = pck_fmt,
.scale = scale);
break;
}
default:
printf("Unsupported intrinsic: \"");
nir_print_instr(&intr->instr, stdout);
@ -2766,14 +2837,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
instr = trans_min_max(tctx, alu->op, dest, src[0], src[1]);
break;
case nir_op_pack_half_16:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
case nir_op_pack_half_2x16:
instr = pco_pck(&tctx->b,
dest,
@ -2782,14 +2845,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
case nir_op_unpack_half_16:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
case nir_op_unpack_half_2x16:
instr = pco_unpck(&tctx->b,
dest,
@ -2798,33 +2853,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
case nir_op_pack_snorm_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_pack_snorm_8_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_pack_snorm_8_8_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_pack_snorm_4x8:
instr = pco_pck(&tctx->b,
dest,
@ -2834,33 +2862,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_snorm_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_unpack_snorm_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_unpack_snorm_8_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_unpack_snorm_4x8:
instr = pco_unpck(&tctx->b,
dest,
@ -2870,65 +2871,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_sscaled_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S8888);
break;
case nir_op_unpack_sscaled_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_S8888);
break;
case nir_op_unpack_sscaled_8_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_S8888);
break;
case nir_op_unpack_sscaled_8_8_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 4,
.pck_fmt = PCO_PCK_FMT_S8888);
break;
case nir_op_pack_unorm_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_pack_unorm_8_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_pack_unorm_8_8_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_pack_unorm_4x8:
instr = pco_pck(&tctx->b,
dest,
@ -2938,33 +2880,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_unorm_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_unpack_unorm_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_unpack_unorm_8_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_unpack_unorm_4x8:
instr = pco_unpck(&tctx->b,
dest,
@ -2974,137 +2889,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_pack_unorm_10_10_10_2:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 4,
.pck_fmt = PCO_PCK_FORMAT_U1010102,
.scale = true);
break;
case nir_op_unpack_unorm_10_10_10_2:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 4,
.pck_fmt = PCO_PCK_FMT_U1010102,
.scale = true);
break;
case nir_op_pack_snorm_10_10_10_2:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 4,
.pck_fmt = PCO_PCK_FORMAT_S1010102,
.scale = true);
break;
case nir_op_unpack_snorm_10_10_10_2:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 4,
.pck_fmt = PCO_PCK_FMT_S1010102,
.scale = true);
break;
case nir_op_pack_uscaled_10_10_10_2:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 4,
.pck_fmt = PCO_PCK_FORMAT_U1010102,
.scale = false);
break;
case nir_op_unpack_uscaled_10_10_10_2:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 4,
.pck_fmt = PCO_PCK_FMT_U1010102,
.scale = false);
break;
case nir_op_pack_sscaled_10_10_10_2:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 4,
.pck_fmt = PCO_PCK_FORMAT_S1010102,
.scale = false);
break;
case nir_op_unpack_sscaled_10_10_10_2:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 4,
.pck_fmt = PCO_PCK_FMT_S1010102,
.scale = false);
break;
case nir_op_pack_unorm_10:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U1010102,
.scale = true);
break;
case nir_op_pack_snorm_10:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S1010102,
.scale = true);
break;
case nir_op_pack_float_10:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_2F10F10F10);
break;
case nir_op_pack_float_11:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_F111110);
break;
case nir_op_pack_float_11_11_10:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_F111110);
break;
case nir_op_unpack_float_11_11_10:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_F111110);
break;
case nir_op_pack_snorm_16:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S1616,
.scale = true);
break;
case nir_op_pack_snorm_2x16:
instr = pco_pck(&tctx->b,
dest,
@ -3114,15 +2898,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_snorm_16:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S1616,
.scale = true);
break;
case nir_op_unpack_snorm_2x16:
instr = pco_unpck(&tctx->b,
dest,
@ -3132,15 +2907,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_pack_unorm_16:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U1616,
.scale = true);
break;
case nir_op_pack_unorm_2x16:
instr = pco_pck(&tctx->b,
dest,
@ -3150,15 +2916,6 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_unorm_16:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U1616,
.scale = true);
break;
case nir_op_unpack_unorm_2x16:
instr = pco_unpck(&tctx->b,
dest,