diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index ac52414b900..0d65981d7da 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -170,6 +170,16 @@ unpack_snorm_1x16(uint16_t u) return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); } +/** + * Evaluate component 3 of packUnorm3x10_1x2. + */ +static uint16_t +pack_unorm_1x2(float x) +{ + return (uint16_t) (int) + _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 3.0f); +} + /** * Evaluate one component packUnorm4x8. */ @@ -189,6 +199,16 @@ pack_unorm_1x8(float x) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); } +/** + * Evaluate component [0,2] of packUnorm3x10_1x2. + */ +static uint16_t +pack_unorm_1x10(float x) +{ + return (uint16_t) (int) + _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 1023.0f); +} + /** * Evaluate one component packUnorm2x16. */ @@ -208,6 +228,16 @@ pack_unorm_1x16(float x) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); } +/** + * Evaluate component 3 of unpackUnorm3x10_1x2. + */ +static float +unpack_unorm_1x2(uint16_t u) +{ + u &= 0x0003; + return (float) u / 3.0f; +} + /** * Evaluate one component of unpackUnorm4x8. */ @@ -226,6 +256,16 @@ unpack_unorm_1x8(uint8_t u) return (float) u / 255.0f; } +/** + * Evaluate component [0,2] of unpackUnorm3x10_1x2. + */ +static float +unpack_unorm_1x10(uint16_t u) +{ + u &= 0x03FF; + return (float) u / 1023.0f; +} + /** * Evaluate one component of unpackUnorm2x16. */ diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index 64d6e469cb9..a1ed894d7ac 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -278,6 +278,14 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) */ return NULL; + case nir_op_unpack_snorm_8_8: + case nir_op_unpack_unorm_8_8: + case nir_op_unpack_snorm_8_8_8: + case nir_op_unpack_unorm_8_8_8: + case nir_op_unpack_unorm_10_10_10_2: + case nir_op_unpack_float_11_11_10: + return NULL; + case nir_op_unpack_half_2x16: { if (!b->shader->options->lower_unpack_half_2x16) return NULL; diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 9436b6983fa..2733a12325e 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1808,3 +1808,100 @@ unop_numeric_convert("f2e4m3fn_satfn", tuint8, tfloat32, "isinf(src0) ? 0x7f : _ unop_numeric_convert("e5m22f", tfloat32, tuint8, "_mesa_e5m2_to_float(src0)") unop_numeric_convert("f2e5m2", tuint8, tfloat32, "_mesa_float_to_e5m2(src0)") unop_numeric_convert("f2e5m2_sat", tuint8, tfloat32, "_mesa_float_to_e5m2_sat(src0)") + +def pack_r(fmt, r): + unop_horiz(f"pack_{fmt}_{r}", 1, tuint32, 1, tfloat32, f""" +dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x); +""") + +def pack_rg(fmt, r, g): + unop_horiz(f"pack_{fmt}_{r}_{g}", 1, tuint32, 2, tfloat32, f""" +dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x); +dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r}; +""") + +def pack_rgb(fmt, r, g, b): + unop_horiz(f"pack_{fmt}_{r}_{g}_{b}", 1, tuint32, 3, tfloat32, f""" +dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x); +dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r}; +dst.x |= ((uint32_t) pack_{fmt}_1x{b}(src0.z)) << {r + g}; +""") + +def pack_rgba(fmt, r, g, b, a): + unop_horiz(f"pack_{fmt}_{r}_{g}_{b}_{a}", 1, tuint32, 4, tfloat32, f""" +dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x); +dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r}; +dst.x |= ((uint32_t) pack_{fmt}_1x{b}(src0.z)) << {r + g}; +dst.x |= ((uint32_t) pack_{fmt}_1x{a}(src0.w)) << {r + g + b}; +""") + +pack_r("snorm", 8) +pack_r("unorm", 8) + +pack_rg("snorm", 8, 8) +pack_rg("unorm", 8, 8) + +pack_rgb("snorm", 8, 8, 8) +pack_rgb("unorm", 8, 8, 8) + +pack_r("snorm", 16) +pack_r("unorm", 16) +pack_r("half", 16) + +pack_rgba("unorm", 10, 10, 10, 2) + +unop_horiz(f"pack_float_11_11_10", 1, tuint32, 3, tfloat32, f""" +dst.x = f32_to_uf11(src0.x) & 0x7ff; +dst.x |= (f32_to_uf11(src0.y) & 0x7ff) << 11; +dst.x |= (f32_to_uf10(src0.z) & 0x3ff) << 22; +""") + +def unpack_r(fmt, r): + unop_horiz(f"unpack_{fmt}_{r}", 1, tfloat32, 1, tuint32, f""" +dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1)); +""") + +def unpack_rg(fmt, r, g): + unop_horiz(f"unpack_{fmt}_{r}_{g}", 2, tfloat32, 1, tuint32, f""" +dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1)); +dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1)); +""") + +def unpack_rgb(fmt, r, g, b): + unop_horiz(f"unpack_{fmt}_{r}_{g}_{b}", 3, tfloat32, 1, tuint32, f""" +dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1)); +dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1)); +dst.z = unpack_{fmt}_1x{b}((src0.x >> {r + g}) & ((1u << {b}) - 1)); +""") + +def unpack_rgba(fmt, r, g, b, a): + unop_horiz(f"unpack_{fmt}_{r}_{g}_{b}_{a}", 4, tfloat32, 1, tuint32, f""" +dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1)); +dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1)); +dst.z = unpack_{fmt}_1x{b}((src0.x >> {r + g}) & ((1u << {b}) - 1)); +dst.w = unpack_{fmt}_1x{a}((src0.x >> {r + g + b}) & ((1u << {a}) - 1)); +""") + +unpack_r("snorm", 8) +unpack_r("unorm", 8) + +unpack_rg("snorm", 8, 8) +unpack_rg("unorm", 8, 8) + +unpack_rgb("snorm", 8, 8, 8) +unpack_rgb("unorm", 8, 8, 8) + +unpack_r("snorm", 16) +unpack_r("unorm", 16) + +unop_horiz(f"unpack_half_16", 1, tfloat32, 1, tuint32, """ +dst.x = unpack_half_1x16(src0.x & ((1u << 16) - 1), nir_is_denorm_flush_to_zero(execution_mode, 16)); +""") + +unpack_rgba("unorm", 10, 10, 10, 2) + +unop_horiz(f"unpack_float_11_11_10", 3, tfloat32, 1, tuint32, f""" +dst.x = uf11_to_f32(src0.x & 0x7ff); +dst.y = uf11_to_f32((src0.x >> 11) & 0x7ff); +dst.z = uf10_to_f32((src0.x >> 22) & 0x3ff); +""") diff --git a/src/imagination/pco/pco_nir_pvfio.c b/src/imagination/pco/pco_nir_pvfio.c index d881b0c1bb0..eeac3ae79ae 100644 --- a/src/imagination/pco/pco_nir_pvfio.c +++ b/src/imagination/pco/pco_nir_pvfio.c @@ -14,6 +14,7 @@ #include "compiler/shader_enums.h" #include "nir.h" #include "nir_builder.h" +#include "nir_format_convert.h" #include "pco.h" #include "pco_builder.h" #include "pco_internal.h" @@ -26,33 +27,17 @@ /** Per-fragment output pass state. */ struct pfo_state { + struct util_dynarray loads; /** List of fragment loads. */ struct util_dynarray stores; /** List of fragment stores. */ pco_fs_data *fs; /** Fragment-specific data. */ }; -/** - * \brief Returns a NIR intrinsic instruction if a NIR instruction matches the - * provided intrinsic op. - * - * \param[in] instr NIR instruction. - * \param[in] op Desired intrinsic op. - * \return The intrinsic instruction, else NULL. - */ -static inline nir_intrinsic_instr *is_intr(nir_instr *instr, - nir_intrinsic_op op) -{ - nir_intrinsic_instr *intr = NULL; +/** Per-vertex input pass state. */ +struct pvi_state { + struct util_dynarray loads; /** List of vertex loads. */ - if (instr->type != nir_instr_type_intrinsic) - return NULL; - - intr = nir_instr_as_intrinsic(instr); - - if (intr->intrinsic != op) - return NULL; - - return intr; -} + pco_vs_data *vs; /** Vertex-specific data. */ +}; /** * \brief Returns the GLSL base type equivalent of a pipe format. @@ -84,96 +69,823 @@ static inline enum glsl_base_type base_type_from_fmt(enum pipe_format format) return GLSL_TYPE_ERROR; } +static enum pipe_format +to_pbe_format(nir_builder *b, enum pipe_format format, nir_def **input) +{ + switch (format) { + case PIPE_FORMAT_B5G6R5_UNORM: + return PIPE_FORMAT_R8G8B8_UNORM; + + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + return PIPE_FORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8G8B8A8_SRGB: + case PIPE_FORMAT_B8G8R8A8_SRGB: + if (input) + *input = nir_fsat(b, *input); + FALLTHROUGH; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + return PIPE_FORMAT_R16G16B16A16_FLOAT; + + case PIPE_FORMAT_R11G11B10_FLOAT: + return PIPE_FORMAT_R16G16B16_FLOAT; + + /* For loadops. */ + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + assert(b->shader->info.internal); + return PIPE_FORMAT_R32_FLOAT; + + default: + break; + } + + return format; +} + +static nir_def *pack_to_format(nir_builder *b, + nir_def *input, + nir_alu_type src_type, + enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + src_type = nir_alu_type_get_base_type(src_type); + + nir_def *input_comps[4]; + for (unsigned u = 0; u < desc->nr_channels; ++u) { + enum pipe_swizzle s = desc->swizzle[u]; + if (s <= PIPE_SWIZZLE_W) { + input_comps[u] = nir_channel(b, input, s); + } else if (s == PIPE_SWIZZLE_0) { + input_comps[u] = nir_imm_int(b, 0); + } else if (s == PIPE_SWIZZLE_1) { + input_comps[u] = src_type == nir_type_float ? nir_imm_float(b, 1.0f) + : nir_imm_int(b, 1); + } else { + UNREACHABLE(""); + } + } + + input = nir_vec(b, input_comps, desc->nr_channels); + + nir_def *zero = nir_imm_int(b, 0); + nir_def *packed[4] = { zero, zero, zero, zero }; + switch (format) { + case PIPE_FORMAT_R8_UNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_unorm_8(b, input); + break; + + case PIPE_FORMAT_R8G8_UNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_unorm_8_8(b, input); + break; + + case PIPE_FORMAT_R8G8B8_UNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_unorm_8_8_8(b, input); + break; + + case PIPE_FORMAT_R8G8B8A8_UNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_unorm_4x8(b, input); + break; + + case PIPE_FORMAT_R8_SNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_snorm_8(b, input); + break; + + case PIPE_FORMAT_R8G8_SNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_snorm_8_8(b, input); + break; + + case PIPE_FORMAT_R8G8B8_SNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_snorm_8_8_8(b, input); + break; + + case PIPE_FORMAT_R8G8B8A8_SNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_snorm_4x8(b, input); + break; + + case PIPE_FORMAT_R8G8B8A8_UINT: + case PIPE_FORMAT_R8G8B8A8_SINT: + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[3], 24, 8); + FALLTHROUGH; + + case PIPE_FORMAT_R8G8B8_UINT: + case PIPE_FORMAT_R8G8B8_SINT: + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[2], 16, 8); + FALLTHROUGH; + + case PIPE_FORMAT_R8G8_UINT: + case PIPE_FORMAT_R8G8_SINT: + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 8, 8); + FALLTHROUGH; + + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R8_SINT: + assert(src_type != nir_type_float); + /* TODO: sat/clamp? */ + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 8); + break; + + case PIPE_FORMAT_R10G10B10A2_UINT: + assert(src_type == nir_type_uint); + /* TODO: sat/clamp? */ + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 10); + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 10, 10); + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[2], 20, 10); + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[3], 30, 2); + break; + + case PIPE_FORMAT_R11G11B10_FLOAT: + assert(src_type == nir_type_float); + packed[0] = nir_pack_float_11_11_10(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16_UNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_unorm_16(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16G16_UNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_unorm_2x16(b, input); + break; + + case PIPE_FORMAT_R16G16B16_UNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b0011)); + packed[1] = nir_pack_unorm_16(b, input_comps[2]); + break; + + case PIPE_FORMAT_R16G16B16A16_UNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b0011)); + packed[1] = nir_pack_unorm_2x16(b, nir_channels(b, input, 0b1100)); + break; + + case PIPE_FORMAT_R16_SNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_snorm_16(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16G16_SNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_snorm_2x16(b, input); + break; + + case PIPE_FORMAT_R16G16B16_SNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b0011)); + packed[1] = nir_pack_snorm_16(b, input_comps[2]); + break; + + case PIPE_FORMAT_R16G16B16A16_SNORM: + assert(src_type == nir_type_float); + packed[0] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b0011)); + packed[1] = nir_pack_snorm_2x16(b, nir_channels(b, input, 0b1100)); + break; + + case PIPE_FORMAT_R16G16B16A16_UINT: + case PIPE_FORMAT_R16G16B16A16_SINT: + packed[1] = nir_bitfield_insert_imm(b, packed[1], input_comps[3], 16, 16); + FALLTHROUGH; + + case PIPE_FORMAT_R16G16B16_UINT: + case PIPE_FORMAT_R16G16B16_SINT: + packed[1] = nir_bitfield_insert_imm(b, packed[1], input_comps[2], 0, 16); + FALLTHROUGH; + + case PIPE_FORMAT_R16G16_UINT: + case PIPE_FORMAT_R16G16_SINT: + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[1], 16, 16); + FALLTHROUGH; + + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_R16_SINT: + assert(src_type != nir_type_float); + /* TODO: sat/clamp? */ + packed[0] = nir_bitfield_insert_imm(b, packed[0], input_comps[0], 0, 16); + break; + + case PIPE_FORMAT_R16_FLOAT: + assert(src_type == nir_type_float); + packed[0] = nir_pack_half_16(b, input); + break; + + case PIPE_FORMAT_R16G16_FLOAT: + assert(src_type == nir_type_float); + packed[0] = nir_pack_half_2x16(b, input); + break; + + case PIPE_FORMAT_R16G16B16_FLOAT: + assert(src_type == nir_type_float); + packed[0] = nir_pack_half_2x16(b, nir_channels(b, input, 0b0011)); + packed[1] = nir_pack_half_16(b, input_comps[2]); + break; + + case PIPE_FORMAT_R16G16B16A16_FLOAT: + assert(src_type == nir_type_float); + packed[0] = nir_pack_half_2x16(b, nir_channels(b, input, 0b0011)); + packed[1] = nir_pack_half_2x16(b, nir_channels(b, input, 0b1100)); + break; + + case PIPE_FORMAT_R32G32B32A32_UINT: + case PIPE_FORMAT_R32G32B32A32_SINT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + packed[3] = input_comps[3]; + FALLTHROUGH; + + case PIPE_FORMAT_R32G32B32_UINT: + case PIPE_FORMAT_R32G32B32_SINT: + case PIPE_FORMAT_R32G32B32_FLOAT: + packed[2] = input_comps[2]; + FALLTHROUGH; + + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R32G32_FLOAT: + packed[1] = input_comps[1]; + FALLTHROUGH; + + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_R32_FLOAT: + packed[0] = input_comps[0]; + break; + + default: + printf("Unsupported pack format %s.\n", util_format_name(format)); + UNREACHABLE(""); + } + + unsigned packed_comps = 1; + + if (packed[3] != zero) + packed_comps = 4; + else if (packed[2] != zero) + packed_comps = 3; + else if (packed[1] != zero) + packed_comps = 2; + + assert(packed[0] != zero); + + return nir_vec(b, packed, packed_comps); +} + +static nir_def *unpack_from_format(nir_builder *b, + nir_def *input, + nir_alu_type dest_type, + enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + dest_type = nir_alu_type_get_base_type(dest_type); + + nir_def *input_comps[4] = { + nir_channel(b, input, 0), + nir_channel(b, input, 1), + nir_channel(b, input, 2), + nir_channel(b, input, 3), + }; + + nir_def *unpacked = nir_undef(b, 4, 32); + switch (format) { + case PIPE_FORMAT_R8_UNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_unorm_8(b, input); + break; + + case PIPE_FORMAT_R8G8_UNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_unorm_8_8(b, input); + break; + + case PIPE_FORMAT_R8G8B8_UNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_unorm_8_8_8(b, input); + break; + + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_unorm_4x8(b, input); + break; + + case PIPE_FORMAT_R8_SNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_snorm_8(b, input); + break; + + case PIPE_FORMAT_R8G8_SNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_snorm_8_8(b, input); + break; + + case PIPE_FORMAT_R8G8B8_SNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_snorm_8_8_8(b, input); + break; + + case PIPE_FORMAT_R8G8B8A8_SNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_snorm_4x8(b, input); + break; + + case PIPE_FORMAT_R8G8B8A8_UINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ubitfield_extract_imm(b, input_comps[0], 24, 8), + 3); + FALLTHROUGH; + + case PIPE_FORMAT_R8G8B8_UINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ubitfield_extract_imm(b, input_comps[0], 16, 8), + 2); + FALLTHROUGH; + + case PIPE_FORMAT_R8G8_UINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ubitfield_extract_imm(b, input_comps[0], 8, 8), + 1); + FALLTHROUGH; + + case PIPE_FORMAT_R8_UINT: + assert(dest_type == nir_type_uint); + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ubitfield_extract_imm(b, input_comps[0], 0, 8), + 0); + break; + + case PIPE_FORMAT_R8G8B8A8_SINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ibitfield_extract_imm(b, input_comps[0], 24, 8), + 3); + FALLTHROUGH; + + case PIPE_FORMAT_R8G8B8_SINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ibitfield_extract_imm(b, input_comps[0], 16, 8), + 2); + FALLTHROUGH; + + case PIPE_FORMAT_R8G8_SINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ibitfield_extract_imm(b, input_comps[0], 8, 8), + 1); + FALLTHROUGH; + + case PIPE_FORMAT_R8_SINT: + assert(dest_type == nir_type_int); + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ibitfield_extract_imm(b, input_comps[0], 0, 8), + 0); + break; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_unorm_10_10_10_2(b, input_comps[0]); + break; + + case PIPE_FORMAT_R10G10B10A2_UINT: + assert(dest_type == nir_type_uint); + unpacked = nir_vec4(b, + nir_ubitfield_extract_imm(b, input_comps[0], 0, 10), + nir_ubitfield_extract_imm(b, input_comps[0], 10, 10), + nir_ubitfield_extract_imm(b, input_comps[0], 20, 10), + nir_ubitfield_extract_imm(b, input_comps[0], 30, 2)); + break; + + case PIPE_FORMAT_R11G11B10_FLOAT: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_float_11_11_10(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16_UNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_unorm_16(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16G16_UNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_unorm_2x16(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16G16B16_UNORM: { + assert(dest_type == nir_type_float); + nir_def *lo2 = nir_unpack_unorm_2x16(b, input_comps[0]); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); + unpacked = nir_vector_insert_imm(b, + unpacked, + nir_unpack_unorm_16(b, input_comps[1]), + 2); + break; + } + + case PIPE_FORMAT_R16G16B16A16_UNORM: + assert(dest_type == nir_type_float); + nir_def *lo2 = nir_unpack_unorm_2x16(b, input_comps[0]); + nir_def *hi2 = nir_unpack_unorm_2x16(b, input_comps[1]); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3); + break; + + case PIPE_FORMAT_R16_SNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_snorm_16(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16G16_SNORM: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_snorm_2x16(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16G16B16_SNORM: { + assert(dest_type == nir_type_float); + nir_def *lo2 = nir_unpack_snorm_2x16(b, input_comps[0]); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); + unpacked = nir_vector_insert_imm(b, + unpacked, + nir_unpack_snorm_16(b, input_comps[1]), + 2); + break; + } + + case PIPE_FORMAT_R16G16B16A16_SNORM: { + assert(dest_type == nir_type_float); + nir_def *lo2 = nir_unpack_snorm_2x16(b, input_comps[0]); + nir_def *hi2 = nir_unpack_snorm_2x16(b, input_comps[1]); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3); + break; + } + + case PIPE_FORMAT_R16G16B16A16_UINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ubitfield_extract_imm(b, input_comps[1], 16, 16), + 3); + FALLTHROUGH; + + case PIPE_FORMAT_R16G16B16_UINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ubitfield_extract_imm(b, input_comps[1], 0, 16), + 2); + FALLTHROUGH; + + case PIPE_FORMAT_R16G16_UINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ubitfield_extract_imm(b, input_comps[0], 16, 16), + 1); + FALLTHROUGH; + + case PIPE_FORMAT_R16_UINT: + assert(dest_type == nir_type_uint); + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ubitfield_extract_imm(b, input_comps[0], 0, 16), + 0); + break; + + case PIPE_FORMAT_R16G16B16A16_SINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ibitfield_extract_imm(b, input_comps[1], 16, 16), + 3); + FALLTHROUGH; + + case PIPE_FORMAT_R16G16B16_SINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ibitfield_extract_imm(b, input_comps[1], 0, 16), + 2); + FALLTHROUGH; + + case PIPE_FORMAT_R16G16_SINT: + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ibitfield_extract_imm(b, input_comps[0], 16, 16), + 1); + FALLTHROUGH; + + case PIPE_FORMAT_R16_SINT: + assert(dest_type == nir_type_int); + unpacked = nir_vector_insert_imm( + b, + unpacked, + nir_ibitfield_extract_imm(b, input_comps[0], 0, 16), + 0); + break; + + case PIPE_FORMAT_R16_FLOAT: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_half_16(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16G16_FLOAT: + assert(dest_type == nir_type_float); + unpacked = nir_unpack_half_2x16(b, input_comps[0]); + break; + + case PIPE_FORMAT_R16G16B16_FLOAT: { + assert(dest_type == nir_type_float); + nir_def *lo2 = nir_unpack_half_2x16(b, input_comps[0]); + + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); + unpacked = nir_vector_insert_imm(b, + unpacked, + nir_unpack_half_16(b, input_comps[1]), + 2); + break; + } + + case PIPE_FORMAT_R16G16B16A16_FLOAT: { + assert(dest_type == nir_type_float); + nir_def *lo2 = nir_unpack_half_2x16(b, input_comps[0]); + nir_def *hi2 = nir_unpack_half_2x16(b, input_comps[1]); + + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 0), 0); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, lo2, 1), 1); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 0), 2); + unpacked = nir_vector_insert_imm(b, unpacked, nir_channel(b, hi2, 1), 3); + + break; + } + + case PIPE_FORMAT_R32G32B32A32_UINT: + case PIPE_FORMAT_R32G32B32A32_SINT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + unpacked = nir_vector_insert_imm(b, unpacked, input_comps[3], 3); + FALLTHROUGH; + + case PIPE_FORMAT_R32G32B32_UINT: + case PIPE_FORMAT_R32G32B32_SINT: + case PIPE_FORMAT_R32G32B32_FLOAT: + unpacked = nir_vector_insert_imm(b, unpacked, input_comps[2], 2); + FALLTHROUGH; + + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R32G32_FLOAT: + unpacked = nir_vector_insert_imm(b, unpacked, input_comps[1], 1); + FALLTHROUGH; + + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_R32_FLOAT: + unpacked = nir_vector_insert_imm(b, unpacked, input_comps[0], 0); + break; + + default: + printf("Unsupported unpack format %s.\n", util_format_name(format)); + UNREACHABLE(""); + } + + nir_def *output_comps[4]; + for (unsigned u = 0; u < ARRAY_SIZE(output_comps); ++u) { + enum pipe_swizzle s = desc->swizzle[u]; + if (s <= PIPE_SWIZZLE_W) { + output_comps[u] = nir_channel(b, unpacked, s); + } else if (s == PIPE_SWIZZLE_0) { + output_comps[u] = nir_imm_int(b, 0); + } else if (s == PIPE_SWIZZLE_1) { + output_comps[u] = dest_type == nir_type_float ? nir_imm_float(b, 1.0f) + : nir_imm_int(b, 1); + } else { + UNREACHABLE(""); + } + } + + return nir_vec(b, output_comps, ARRAY_SIZE(output_comps)); +} + +static inline bool is_processed(nir_intrinsic_instr *intr) +{ + nir_alu_type type; + + if (nir_intrinsic_has_src_type(intr)) + type = nir_intrinsic_src_type(intr); + else if (nir_intrinsic_has_dest_type(intr)) + type = nir_intrinsic_dest_type(intr); + else + return true; + + return nir_alu_type_get_base_type(type) == nir_type_invalid; +} + +static nir_def *lower_pfo_store(nir_builder *b, + nir_intrinsic_instr *intr, + struct pfo_state *state) +{ + /* Skip stores we've already processed. */ + if (is_processed(intr)) { + util_dynarray_append(&state->stores, nir_intrinsic_instr *, intr); + return NULL; + } + + nir_def *input = intr->src[0].ssa; + nir_src *offset = &intr->src[1]; + assert(nir_src_as_uint(*offset) == 0); + + ASSERTED unsigned bit_size = input->bit_size; + assert(bit_size == 32); + + unsigned component = nir_intrinsic_component(intr); + assert(!component); + + struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr); + gl_frag_result location = io_semantics.location; + + b->cursor = nir_before_instr(&intr->instr); + + enum pipe_format format = state->fs->output_formats[location]; + format = to_pbe_format(b, format, &input); + + nir_alu_type src_type = nir_intrinsic_src_type(intr); + nir_def *output = pack_to_format(b, input, src_type, format); + + /* Emit and track the new store. */ + nir_intrinsic_instr *store = + nir_store_output(b, + output, + offset->ssa, + .base = nir_intrinsic_base(intr), + .write_mask = BITFIELD_MASK(output->num_components), + .src_type = nir_type_invalid | 32, + .component = component, + .io_semantics = io_semantics, + .io_xfb = nir_intrinsic_io_xfb(intr), + .io_xfb2 = nir_intrinsic_io_xfb2(intr)); + + util_dynarray_append(&state->stores, nir_intrinsic_instr *, store); + + /* Update the type of the stored variable. */ + nir_variable *var = + nir_find_variable_with_location(b->shader, nir_var_shader_out, location); + assert(var); + var->type = glsl_uvec_type(output->num_components); + + return NIR_LOWER_INSTR_PROGRESS_REPLACE; +} + +static nir_def *lower_pfo_load(nir_builder *b, + nir_intrinsic_instr *intr, + struct pfo_state *state) +{ + /* Skip loads we've already processed. */ + if (is_processed(intr)) { + util_dynarray_append(&state->loads, nir_intrinsic_instr *, intr); + return NULL; + } + + unsigned base = nir_intrinsic_base(intr); + + nir_src *offset = &intr->src[0]; + assert(nir_src_as_uint(*offset) == 0); + + struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr); + gl_frag_result location = io_semantics.location; + + enum pipe_format format = state->fs->output_formats[location]; + + format = to_pbe_format(b, format, NULL); + + nir_def *input_comps[4]; + for (unsigned c = 0; c < ARRAY_SIZE(input_comps); ++c) { + input_comps[c] = nir_load_output(b, + 1, + 32, + offset->ssa, + .base = base, + .component = c, + .dest_type = nir_type_invalid | 32, + .io_semantics = io_semantics); + + nir_intrinsic_instr *load = + nir_instr_as_intrinsic(input_comps[c]->parent_instr); + + util_dynarray_append(&state->loads, nir_intrinsic_instr *, load); + } + + nir_def *input = nir_vec(b, input_comps, ARRAY_SIZE(input_comps)); + nir_alu_type dest_type = nir_intrinsic_dest_type(intr); + nir_def *output = unpack_from_format(b, input, dest_type, format); + if (output->num_components > intr->def.num_components) + output = nir_trim_vector(b, output, intr->def.num_components); + + return output; +} + +/** + * \brief Filters PFO-related instructions. + * + * \param[in] instr NIR instruction. + * \param[in] cb_data User callback data. + * \return True if the instruction matches the filter. + */ +static bool is_pfo(const nir_instr *instr, UNUSED const void *cb_data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_store_output: + case nir_intrinsic_load_output: + return true; + + default: + break; + } + + return false; +} + /** * \brief Lowers a PFO-related instruction. * * \param[in] b NIR builder. * \param[in] instr NIR instruction. * \param[in] cb_data User callback data. - * \return True if the instruction was lowered. + * \return The replacement/lowered def. */ -static bool lower_pfo(nir_builder *b, nir_instr *instr, void *cb_data) +static nir_def *lower_pfo(nir_builder *b, nir_instr *instr, void *cb_data) { struct pfo_state *state = cb_data; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - /* TODO NEXT: move into separate function (pack_to_pbe), - * and use data from driver to actually figure out format stuff! - */ - nir_intrinsic_instr *intr; - if ((intr = is_intr(instr, nir_intrinsic_store_output))) { - /* Skip stores we've already processed. */ - util_dynarray_foreach (&state->stores, nir_intrinsic_instr *, store) { - if (intr == *store) - return false; - } + switch (intr->intrinsic) { + case nir_intrinsic_store_output: + return lower_pfo_store(b, intr, state); - nir_src *value = &intr->src[0]; - nir_src *offset = &intr->src[1]; + case nir_intrinsic_load_output: + return lower_pfo_load(b, intr, state); - /* TODO: more accurate way of detecting this */ - /* Already in expected format. */ - if (b->shader->info.internal && nir_src_num_components(*value) == 1) { - util_dynarray_append(&state->stores, nir_intrinsic_instr *, intr); - return false; - } - - assert(nir_src_as_uint(*offset) == 0); - - assert(nir_src_num_components(*value) == 4); - assert(nir_src_bit_size(*value) == 32); - - struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr); - gl_frag_result location = io_semantics.location; - - enum pipe_format format = state->fs->output_formats[location]; - - unsigned format_bits = util_format_get_blocksizebits(format); - assert(!(format_bits % 32)); - - /* Update the type of the stored variable. */ - nir_variable *var = nir_find_variable_with_location(b->shader, - nir_var_shader_out, - location); - assert(var); - - var->type = glsl_simple_explicit_type(base_type_from_fmt(format), - format_bits / 32, - 1, - 0, - false, - 0); - - b->cursor = nir_after_block( - nir_impl_last_block(nir_shader_get_entrypoint(b->shader))); - - /* Emit and track the new store. */ - /* TODO: support other formats. */ - if (format == PIPE_FORMAT_R8G8B8A8_UNORM) { - nir_intrinsic_instr *store = - nir_store_output(b, - nir_pack_unorm_4x8(b, value->ssa), - offset->ssa, - .base = nir_intrinsic_base(intr), - .write_mask = 1, - .component = 0, - .src_type = nir_type_uint32, - .io_semantics = io_semantics, - .io_xfb = nir_intrinsic_io_xfb(intr), - .io_xfb2 = nir_intrinsic_io_xfb2(intr)); - util_dynarray_append(&state->stores, nir_intrinsic_instr *, store); - } else { - UNREACHABLE(""); - } - - /* Remove the old store. */ - b->cursor = nir_instr_remove(instr); - - return true; + default: + break; } return false; } +static bool sink_outputs(nir_shader *shader, struct pfo_state *state) +{ + bool progress = false; + + nir_instr *after_instr = nir_block_last_instr( + nir_impl_last_block(nir_shader_get_entrypoint(shader))); + + util_dynarray_foreach (&state->stores, nir_intrinsic_instr *, store) { + nir_instr *instr = &(*store)->instr; + + progress |= nir_instr_move(nir_after_instr(after_instr), instr); + after_instr = instr; + } + + return progress; +} + /** * \brief Per-fragment output pass. * @@ -186,18 +898,93 @@ bool pco_nir_pfo(nir_shader *shader, pco_fs_data *fs) assert(shader->info.stage == MESA_SHADER_FRAGMENT); struct pfo_state state = { .fs = fs }; + util_dynarray_init(&state.loads, NULL); util_dynarray_init(&state.stores, NULL); - bool progress = nir_shader_instructions_pass(shader, - lower_pfo, - nir_metadata_none, - &state); + bool progress = + nir_shader_lower_instructions(shader, is_pfo, lower_pfo, &state); + + progress |= sink_outputs(shader, &state); util_dynarray_fini(&state.stores); + util_dynarray_fini(&state.loads); return progress; } +static nir_def *lower_pvi(nir_builder *b, nir_instr *instr, void *cb_data) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + struct pvi_state *state = cb_data; + + /* Skip loads we've already processed. */ + if (is_processed(intr)) { + util_dynarray_append(&state->loads, nir_intrinsic_instr *, intr); + return NULL; + } + + nir_src *offset = &intr->src[0]; + assert(nir_src_as_uint(*offset) == 0); + + struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr); + gl_vert_attrib location = io_semantics.location; + + b->cursor = nir_before_instr(&intr->instr); + + enum pipe_format format = state->vs->attrib_formats[location]; + + nir_def *input_comps[4]; + for (unsigned c = 0; c < ARRAY_SIZE(input_comps); ++c) { + input_comps[c] = nir_load_input(b, + 1, + 32, + offset->ssa, + .base = nir_intrinsic_base(intr), + .component = c, + .dest_type = nir_type_invalid | 32, + .io_semantics = io_semantics); + + nir_intrinsic_instr *load = + nir_instr_as_intrinsic(input_comps[c]->parent_instr); + + util_dynarray_append(&state->loads, nir_intrinsic_instr *, load); + } + + nir_def *input = nir_vec(b, input_comps, ARRAY_SIZE(input_comps)); + nir_alu_type dest_type = nir_intrinsic_dest_type(intr); + nir_def *output = unpack_from_format(b, input, dest_type, format); + if (output->num_components > intr->def.num_components) + output = nir_trim_vector(b, output, intr->def.num_components); + + /* Update the type of the stored variable. */ + nir_variable *var = + nir_find_variable_with_location(b->shader, nir_var_shader_in, location); + assert(var); + + unsigned format_dwords = + DIV_ROUND_UP(util_format_get_blocksize(format), sizeof(uint32_t)); + + var->type = glsl_uvec_type(format_dwords); + + return output; +} + +static bool is_pvi(const nir_instr *instr, const void *cb_data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_input) + return false; + + gl_vert_attrib location = nir_intrinsic_io_semantics(intr).location; + assert(location >= VERT_ATTRIB_GENERIC0 && + location <= VERT_ATTRIB_GENERIC15); + + return true; +} + /** * \brief Per-vertex input pass. * @@ -209,11 +996,18 @@ bool pco_nir_pvi(nir_shader *shader, pco_vs_data *vs) { assert(shader->info.stage == MESA_SHADER_VERTEX); - puts("finishme: pco_nir_pvi"); - /* TODO: format conversion and inserting unspecified/missing components. */ - return false; + struct pvi_state state = { .vs = vs }; + + util_dynarray_init(&state.loads, NULL); + + bool progress = + nir_shader_lower_instructions(shader, is_pvi, lower_pvi, &state); + + util_dynarray_fini(&state.loads); + + return progress; } /** diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index a4b5731ebe2..fc6b2a4ce55 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -189,9 +189,6 @@ trans_load_input_vs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest) ASSERTED unsigned base = nir_intrinsic_base(intr); assert(!base); - ASSERTED nir_alu_type type = nir_intrinsic_dest_type(intr); - assert(type == nir_type_float32 || type == nir_type_uint32 || - type == nir_type_int32); /* TODO: f16 support. */ ASSERTED const nir_src offset = intr->src[0]; @@ -470,6 +467,37 @@ static unsigned fetch_resource_base_reg_packed(const pco_common_data *common, return fetch_resource_base_reg(common, desc_set, binding, elem, is_img_smp); } +/** + * \brief Translates a NIR fs load_output intrinsic into PCO. + * + * \param[in,out] tctx Translation context. + * \param[in] intr load_output intrinsic. + * \return The translated PCO instruction. + */ +static pco_instr * +trans_load_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest) +{ + ASSERTED unsigned base = nir_intrinsic_base(intr); + assert(!base); + + unsigned component = nir_intrinsic_component(intr); + + ASSERTED const nir_src offset = intr->src[0]; + assert(nir_src_as_uint(offset) == 0); + + gl_varying_slot location = nir_intrinsic_io_semantics(intr).location; + + const pco_range *range = &tctx->shader->data.fs.outputs[location]; + assert(component < range->count); + + ASSERTED bool output_reg = tctx->shader->data.fs.output_reg[location]; + assert(output_reg); + /* TODO: tile buffer support. */ + + pco_ref src = pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT); + return pco_mov(&tctx->b, dest, src, .olchk = true); +} + static pco_instr *trans_load_common_store(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest, @@ -1133,6 +1161,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) UNREACHABLE("Unsupported stage for \"nir_intrinsic_store_output\"."); break; + case nir_intrinsic_load_output: + assert(tctx->stage == MESA_SHADER_FRAGMENT); + instr = trans_load_output_fs(tctx, intr, dest); + break; + case nir_intrinsic_load_push_constant: instr = trans_load_common_store(tctx, @@ -2112,6 +2145,14 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) instr = trans_min_max(tctx, alu->op, dest, src[0], src[1]); break; + case nir_op_pack_half_16: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_F16F16); + break; + case nir_op_pack_half_2x16: instr = pco_pck(&tctx->b, dest, @@ -2120,6 +2161,14 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .pck_fmt = PCO_PCK_FMT_F16F16); break; + case nir_op_unpack_half_16: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_F16F16); + break; + case nir_op_unpack_half_2x16: instr = pco_unpck(&tctx->b, dest, @@ -2128,6 +2177,33 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .pck_fmt = PCO_PCK_FMT_F16F16); break; + case nir_op_pack_snorm_8: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_S8888, + .scale = true); + break; + + case nir_op_pack_snorm_8_8: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 2, + .pck_fmt = PCO_PCK_FMT_S8888, + .scale = true); + break; + + case nir_op_pack_snorm_8_8_8: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 3, + .pck_fmt = PCO_PCK_FMT_S8888, + .scale = true); + break; + case nir_op_pack_snorm_4x8: instr = pco_pck(&tctx->b, dest, @@ -2137,6 +2213,33 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; + case nir_op_unpack_snorm_8: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_S8888, + .scale = true); + break; + + case nir_op_unpack_snorm_8_8: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 2, + .pck_fmt = PCO_PCK_FMT_S8888, + .scale = true); + break; + + case nir_op_unpack_snorm_8_8_8: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 3, + .pck_fmt = PCO_PCK_FMT_S8888, + .scale = true); + break; + case nir_op_unpack_snorm_4x8: instr = pco_unpck(&tctx->b, dest, @@ -2146,6 +2249,33 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; + case nir_op_pack_unorm_8: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_U8888, + .scale = true); + break; + + case nir_op_pack_unorm_8_8: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 2, + .pck_fmt = PCO_PCK_FMT_U8888, + .scale = true); + break; + + case nir_op_pack_unorm_8_8_8: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 3, + .pck_fmt = PCO_PCK_FMT_U8888, + .scale = true); + break; + case nir_op_pack_unorm_4x8: instr = pco_pck(&tctx->b, dest, @@ -2155,6 +2285,33 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; + case nir_op_unpack_unorm_8: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_U8888, + .scale = true); + break; + + case nir_op_unpack_unorm_8_8: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 2, + .pck_fmt = PCO_PCK_FMT_U8888, + .scale = true); + break; + + case nir_op_unpack_unorm_8_8_8: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 3, + .pck_fmt = PCO_PCK_FMT_U8888, + .scale = true); + break; + case nir_op_unpack_unorm_4x8: instr = pco_unpck(&tctx->b, dest, @@ -2164,6 +2321,49 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; + case nir_op_pack_unorm_10_10_10_2: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 4, + .pck_fmt = PCO_PCK_FORMAT_U1010102, + .scale = true); + break; + + case nir_op_unpack_unorm_10_10_10_2: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 4, + .pck_fmt = PCO_PCK_FMT_U1010102, + .scale = true); + break; + + case nir_op_pack_float_11_11_10: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 3, + .pck_fmt = PCO_PCK_FMT_F111110); + break; + + case nir_op_unpack_float_11_11_10: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 3, + .pck_fmt = PCO_PCK_FMT_F111110); + break; + + case nir_op_pack_snorm_16: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_S1616, + .scale = true); + break; + case nir_op_pack_snorm_2x16: instr = pco_pck(&tctx->b, dest, @@ -2173,6 +2373,15 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; + case nir_op_unpack_snorm_16: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_S1616, + .scale = true); + break; + case nir_op_unpack_snorm_2x16: instr = pco_unpck(&tctx->b, dest, @@ -2182,6 +2391,15 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; + case nir_op_pack_unorm_16: + instr = pco_pck(&tctx->b, + dest, + src[0], + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_U1616, + .scale = true); + break; + case nir_op_pack_unorm_2x16: instr = pco_pck(&tctx->b, dest, @@ -2191,6 +2409,15 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) .scale = true); break; + case nir_op_unpack_unorm_16: + instr = pco_unpck(&tctx->b, + dest, + pco_ref_elem(src[0], 0), + .rpt = 1, + .pck_fmt = PCO_PCK_FMT_U1616, + .scale = true); + break; + case nir_op_unpack_unorm_2x16: instr = pco_unpck(&tctx->b, dest, diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index 46fbfa0163c..0be301126bb 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -1247,8 +1247,6 @@ static void pvr_graphics_pipeline_setup_vertex_dma( const VkVertexInputBindingDescription *sorted_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 }; - const VkVertexInputAttributeDescription - *sorted_attributes[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 }; /* Vertex attributes map to the `layout(location = x)` annotation in the * shader where `x` is the attribute's location. @@ -1270,18 +1268,9 @@ static void pvr_graphics_pipeline_setup_vertex_dma( for (uint32_t i = 0; i < vertex_input_state->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *attribute_desc = + const VkVertexInputAttributeDescription *attribute = &vertex_input_state->pVertexAttributeDescriptions[i]; - sorted_attributes[attribute_desc->location] = attribute_desc; - } - - for (uint32_t i = 0; i < vertex_input_state->vertexAttributeDescriptionCount; - i++) { - const VkVertexInputAttributeDescription *attribute = sorted_attributes[i]; - if (!attribute) - continue; - gl_vert_attrib location = attribute->location + VERT_ATTRIB_GENERIC0; const VkVertexInputBindingDescription *binding = sorted_bindings[attribute->binding]; @@ -1629,7 +1618,6 @@ static void pvr_init_vs_attribs( static void pvr_alloc_vs_attribs(pco_data *data, nir_shader *nir) { - /* TODO NEXT: this should be based on the format size. */ nir_foreach_shader_in_variable (var, nir) { allocate_var(data->vs.attribs, &data->common.vtxins, var, 1); }