pvr, pco: per frag/vertex input/output rework

Adds support for packing and unpacking r10g10b10a2 unorm and
r11g11b10 float formats, as well as partial 2x16 and 4x8 formats.

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2024-12-03 15:14:34 +00:00 committed by Marge Bot
parent 3af73ef199
commit db686e190a
6 changed files with 1272 additions and 118 deletions

View file

@ -170,6 +170,16 @@ unpack_snorm_1x16(uint16_t u)
return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
}
/**
* Evaluate component 3 of packUnorm3x10_1x2.
*/
static uint16_t
pack_unorm_1x2(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 3.0f);
}
/**
* Evaluate one component packUnorm4x8.
*/
@ -189,6 +199,16 @@ pack_unorm_1x8(float x)
_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
}
/**
* Evaluate component [0,2] of packUnorm3x10_1x2.
*/
static uint16_t
pack_unorm_1x10(float x)
{
return (uint16_t) (int)
_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 1023.0f);
}
/**
* Evaluate one component packUnorm2x16.
*/
@ -208,6 +228,16 @@ pack_unorm_1x16(float x)
_mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
}
/**
* Evaluate component 3 of unpackUnorm3x10_1x2.
*/
static float
unpack_unorm_1x2(uint16_t u)
{
u &= 0x0003;
return (float) u / 3.0f;
}
/**
* Evaluate one component of unpackUnorm4x8.
*/
@ -226,6 +256,16 @@ unpack_unorm_1x8(uint8_t u)
return (float) u / 255.0f;
}
/**
* Evaluate component [0,2] of unpackUnorm3x10_1x2.
*/
static float
unpack_unorm_1x10(uint16_t u)
{
u &= 0x03FF;
return (float) u / 1023.0f;
}
/**
* Evaluate one component of unpackUnorm2x16.
*/

View file

@ -278,6 +278,14 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
*/
return NULL;
case nir_op_unpack_snorm_8_8:
case nir_op_unpack_unorm_8_8:
case nir_op_unpack_snorm_8_8_8:
case nir_op_unpack_unorm_8_8_8:
case nir_op_unpack_unorm_10_10_10_2:
case nir_op_unpack_float_11_11_10:
return NULL;
case nir_op_unpack_half_2x16: {
if (!b->shader->options->lower_unpack_half_2x16)
return NULL;

View file

@ -1808,3 +1808,100 @@ unop_numeric_convert("f2e4m3fn_satfn", tuint8, tfloat32, "isinf(src0) ? 0x7f : _
unop_numeric_convert("e5m22f", tfloat32, tuint8, "_mesa_e5m2_to_float(src0)")
unop_numeric_convert("f2e5m2", tuint8, tfloat32, "_mesa_float_to_e5m2(src0)")
unop_numeric_convert("f2e5m2_sat", tuint8, tfloat32, "_mesa_float_to_e5m2_sat(src0)")
def pack_r(fmt, r):
unop_horiz(f"pack_{fmt}_{r}", 1, tuint32, 1, tfloat32, f"""
dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x);
""")
def pack_rg(fmt, r, g):
unop_horiz(f"pack_{fmt}_{r}_{g}", 1, tuint32, 2, tfloat32, f"""
dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x);
dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r};
""")
def pack_rgb(fmt, r, g, b):
unop_horiz(f"pack_{fmt}_{r}_{g}_{b}", 1, tuint32, 3, tfloat32, f"""
dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x);
dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r};
dst.x |= ((uint32_t) pack_{fmt}_1x{b}(src0.z)) << {r + g};
""")
def pack_rgba(fmt, r, g, b, a):
unop_horiz(f"pack_{fmt}_{r}_{g}_{b}_{a}", 1, tuint32, 4, tfloat32, f"""
dst.x = (uint32_t) pack_{fmt}_1x{r}(src0.x);
dst.x |= ((uint32_t) pack_{fmt}_1x{g}(src0.y)) << {r};
dst.x |= ((uint32_t) pack_{fmt}_1x{b}(src0.z)) << {r + g};
dst.x |= ((uint32_t) pack_{fmt}_1x{a}(src0.w)) << {r + g + b};
""")
pack_r("snorm", 8)
pack_r("unorm", 8)
pack_rg("snorm", 8, 8)
pack_rg("unorm", 8, 8)
pack_rgb("snorm", 8, 8, 8)
pack_rgb("unorm", 8, 8, 8)
pack_r("snorm", 16)
pack_r("unorm", 16)
pack_r("half", 16)
pack_rgba("unorm", 10, 10, 10, 2)
unop_horiz(f"pack_float_11_11_10", 1, tuint32, 3, tfloat32, f"""
dst.x = f32_to_uf11(src0.x) & 0x7ff;
dst.x |= (f32_to_uf11(src0.y) & 0x7ff) << 11;
dst.x |= (f32_to_uf10(src0.z) & 0x3ff) << 22;
""")
def unpack_r(fmt, r):
unop_horiz(f"unpack_{fmt}_{r}", 1, tfloat32, 1, tuint32, f"""
dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1));
""")
def unpack_rg(fmt, r, g):
unop_horiz(f"unpack_{fmt}_{r}_{g}", 2, tfloat32, 1, tuint32, f"""
dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1));
dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1));
""")
def unpack_rgb(fmt, r, g, b):
unop_horiz(f"unpack_{fmt}_{r}_{g}_{b}", 3, tfloat32, 1, tuint32, f"""
dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1));
dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1));
dst.z = unpack_{fmt}_1x{b}((src0.x >> {r + g}) & ((1u << {b}) - 1));
""")
def unpack_rgba(fmt, r, g, b, a):
unop_horiz(f"unpack_{fmt}_{r}_{g}_{b}_{a}", 4, tfloat32, 1, tuint32, f"""
dst.x = unpack_{fmt}_1x{r}(src0.x & ((1u << {r}) - 1));
dst.y = unpack_{fmt}_1x{g}((src0.x >> {r}) & ((1u << {g}) - 1));
dst.z = unpack_{fmt}_1x{b}((src0.x >> {r + g}) & ((1u << {b}) - 1));
dst.w = unpack_{fmt}_1x{a}((src0.x >> {r + g + b}) & ((1u << {a}) - 1));
""")
unpack_r("snorm", 8)
unpack_r("unorm", 8)
unpack_rg("snorm", 8, 8)
unpack_rg("unorm", 8, 8)
unpack_rgb("snorm", 8, 8, 8)
unpack_rgb("unorm", 8, 8, 8)
unpack_r("snorm", 16)
unpack_r("unorm", 16)
unop_horiz(f"unpack_half_16", 1, tfloat32, 1, tuint32, """
dst.x = unpack_half_1x16(src0.x & ((1u << 16) - 1), nir_is_denorm_flush_to_zero(execution_mode, 16));
""")
unpack_rgba("unorm", 10, 10, 10, 2)
unop_horiz(f"unpack_float_11_11_10", 3, tfloat32, 1, tuint32, f"""
dst.x = uf11_to_f32(src0.x & 0x7ff);
dst.y = uf11_to_f32((src0.x >> 11) & 0x7ff);
dst.z = uf10_to_f32((src0.x >> 22) & 0x3ff);
""")

File diff suppressed because it is too large Load diff

View file

@ -189,9 +189,6 @@ trans_load_input_vs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
ASSERTED unsigned base = nir_intrinsic_base(intr);
assert(!base);
ASSERTED nir_alu_type type = nir_intrinsic_dest_type(intr);
assert(type == nir_type_float32 || type == nir_type_uint32 ||
type == nir_type_int32);
/* TODO: f16 support. */
ASSERTED const nir_src offset = intr->src[0];
@ -470,6 +467,37 @@ static unsigned fetch_resource_base_reg_packed(const pco_common_data *common,
return fetch_resource_base_reg(common, desc_set, binding, elem, is_img_smp);
}
/**
* \brief Translates a NIR fs load_output intrinsic into PCO.
*
* \param[in,out] tctx Translation context.
* \param[in] intr load_output intrinsic.
* \return The translated PCO instruction.
*/
static pco_instr *
trans_load_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
{
ASSERTED unsigned base = nir_intrinsic_base(intr);
assert(!base);
unsigned component = nir_intrinsic_component(intr);
ASSERTED const nir_src offset = intr->src[0];
assert(nir_src_as_uint(offset) == 0);
gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
const pco_range *range = &tctx->shader->data.fs.outputs[location];
assert(component < range->count);
ASSERTED bool output_reg = tctx->shader->data.fs.output_reg[location];
assert(output_reg);
/* TODO: tile buffer support. */
pco_ref src = pco_ref_hwreg(range->start + component, PCO_REG_CLASS_PIXOUT);
return pco_mov(&tctx->b, dest, src, .olchk = true);
}
static pco_instr *trans_load_common_store(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
@ -1133,6 +1161,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
UNREACHABLE("Unsupported stage for \"nir_intrinsic_store_output\".");
break;
case nir_intrinsic_load_output:
assert(tctx->stage == MESA_SHADER_FRAGMENT);
instr = trans_load_output_fs(tctx, intr, dest);
break;
case nir_intrinsic_load_push_constant:
instr =
trans_load_common_store(tctx,
@ -2112,6 +2145,14 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
instr = trans_min_max(tctx, alu->op, dest, src[0], src[1]);
break;
case nir_op_pack_half_16:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
case nir_op_pack_half_2x16:
instr = pco_pck(&tctx->b,
dest,
@ -2120,6 +2161,14 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
case nir_op_unpack_half_16:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
case nir_op_unpack_half_2x16:
instr = pco_unpck(&tctx->b,
dest,
@ -2128,6 +2177,33 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.pck_fmt = PCO_PCK_FMT_F16F16);
break;
case nir_op_pack_snorm_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_pack_snorm_8_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_pack_snorm_8_8_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_pack_snorm_4x8:
instr = pco_pck(&tctx->b,
dest,
@ -2137,6 +2213,33 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_snorm_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_unpack_snorm_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_unpack_snorm_8_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_S8888,
.scale = true);
break;
case nir_op_unpack_snorm_4x8:
instr = pco_unpck(&tctx->b,
dest,
@ -2146,6 +2249,33 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_pack_unorm_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_pack_unorm_8_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_pack_unorm_8_8_8:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_pack_unorm_4x8:
instr = pco_pck(&tctx->b,
dest,
@ -2155,6 +2285,33 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_unorm_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_unpack_unorm_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 2,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_unpack_unorm_8_8_8:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_U8888,
.scale = true);
break;
case nir_op_unpack_unorm_4x8:
instr = pco_unpck(&tctx->b,
dest,
@ -2164,6 +2321,49 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_pack_unorm_10_10_10_2:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 4,
.pck_fmt = PCO_PCK_FORMAT_U1010102,
.scale = true);
break;
case nir_op_unpack_unorm_10_10_10_2:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 4,
.pck_fmt = PCO_PCK_FMT_U1010102,
.scale = true);
break;
case nir_op_pack_float_11_11_10:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_F111110);
break;
case nir_op_unpack_float_11_11_10:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 3,
.pck_fmt = PCO_PCK_FMT_F111110);
break;
case nir_op_pack_snorm_16:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S1616,
.scale = true);
break;
case nir_op_pack_snorm_2x16:
instr = pco_pck(&tctx->b,
dest,
@ -2173,6 +2373,15 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_snorm_16:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_S1616,
.scale = true);
break;
case nir_op_unpack_snorm_2x16:
instr = pco_unpck(&tctx->b,
dest,
@ -2182,6 +2391,15 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_pack_unorm_16:
instr = pco_pck(&tctx->b,
dest,
src[0],
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U1616,
.scale = true);
break;
case nir_op_pack_unorm_2x16:
instr = pco_pck(&tctx->b,
dest,
@ -2191,6 +2409,15 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
.scale = true);
break;
case nir_op_unpack_unorm_16:
instr = pco_unpck(&tctx->b,
dest,
pco_ref_elem(src[0], 0),
.rpt = 1,
.pck_fmt = PCO_PCK_FMT_U1616,
.scale = true);
break;
case nir_op_unpack_unorm_2x16:
instr = pco_unpck(&tctx->b,
dest,

View file

@ -1247,8 +1247,6 @@ static void pvr_graphics_pipeline_setup_vertex_dma(
const VkVertexInputBindingDescription
*sorted_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 };
const VkVertexInputAttributeDescription
*sorted_attributes[PVR_MAX_VERTEX_INPUT_BINDINGS] = { 0 };
/* Vertex attributes map to the `layout(location = x)` annotation in the
* shader where `x` is the attribute's location.
@ -1270,18 +1268,9 @@ static void pvr_graphics_pipeline_setup_vertex_dma(
for (uint32_t i = 0; i < vertex_input_state->vertexAttributeDescriptionCount;
i++) {
const VkVertexInputAttributeDescription *attribute_desc =
const VkVertexInputAttributeDescription *attribute =
&vertex_input_state->pVertexAttributeDescriptions[i];
sorted_attributes[attribute_desc->location] = attribute_desc;
}
for (uint32_t i = 0; i < vertex_input_state->vertexAttributeDescriptionCount;
i++) {
const VkVertexInputAttributeDescription *attribute = sorted_attributes[i];
if (!attribute)
continue;
gl_vert_attrib location = attribute->location + VERT_ATTRIB_GENERIC0;
const VkVertexInputBindingDescription *binding =
sorted_bindings[attribute->binding];
@ -1629,7 +1618,6 @@ static void pvr_init_vs_attribs(
static void pvr_alloc_vs_attribs(pco_data *data, nir_shader *nir)
{
/* TODO NEXT: this should be based on the format size. */
nir_foreach_shader_in_variable (var, nir) {
allocate_var(data->vs.attribs, &data->common.vtxins, var, 1);
}