diff --git a/docs/envvars.rst b/docs/envvars.rst
index d3ad9b5985c..dbaaa84b9dc 100644
--- a/docs/envvars.rst
+++ b/docs/envvars.rst
@@ -2139,6 +2139,9 @@ PowerVR driver environment variables
``no_pred_cf``
No predicated execution in CF.
+ ``alloc_extra_vtxins``
+ Allocates additional vertex input registers.
+
.. envvar:: PCO_SKIP_PASSES
A comma-separated list of passes to skip.
diff --git a/src/imagination/.clang-format b/src/imagination/.clang-format
index 30ed663d997..c918aaaef41 100644
--- a/src/imagination/.clang-format
+++ b/src/imagination/.clang-format
@@ -294,6 +294,7 @@ ForEachMacros: [
'pco_foreach_instr_src_ssa_from',
'pco_foreach_instr_src_vreg',
'pco_foreach_instr_src_vreg_ssa',
+ 'pco_foreach_instr_src_vtxin_reg',
'pco_foreach_loop_in_func',
'pco_foreach_loop_in_func_from',
'pco_foreach_loop_in_func_from_rev',
diff --git a/src/imagination/include/hwdef/rogue_hw_utils.h b/src/imagination/include/hwdef/rogue_hw_utils.h
index 6324cd56c42..a859059e9b6 100644
--- a/src/imagination/include/hwdef/rogue_hw_utils.h
+++ b/src/imagination/include/hwdef/rogue_hw_utils.h
@@ -478,6 +478,12 @@ rogue_max_wg_temps(const struct pvr_device_info *dev_info,
return temps;
}
+static inline uint32_t rogue_get_vtxins(void)
+{
+ /* TODO: use highest safe number of vertex input registers. */
+ return 12;
+}
+
static inline uint32_t
rogue_num_uscs_per_tile(const struct pvr_device_info *dev_info)
{
diff --git a/src/imagination/pco/pco.c b/src/imagination/pco/pco.c
index 42c76bd9db3..4274442b362 100644
--- a/src/imagination/pco/pco.c
+++ b/src/imagination/pco/pco.c
@@ -370,6 +370,7 @@ struct pvr_stats pco_get_pvr_stats(pco_shader *shader)
.scratch_size = shader->data.common.scratch,
.spill_count = shader->data.common.spilled_temps,
.temp_count = shader->data.common.temps,
+ .vtxin_count = shader->data.common.vtxins,
.loop_count = loop_count,
.inst_group_count = igrp_count,
.main_inst_group_count = main_count,
diff --git a/src/imagination/pco/pco_debug.c b/src/imagination/pco/pco_debug.c
index 54f5c254e35..c6d48c56cfe 100644
--- a/src/imagination/pco/pco_debug.c
+++ b/src/imagination/pco/pco_debug.c
@@ -26,6 +26,9 @@ static const struct debug_named_value pco_debug_options[] = {
{ "val_skip", PCO_DEBUG_VAL_SKIP, "Skip IR validation." },
{ "reindex", PCO_DEBUG_REINDEX, "Reindex IR at the end of each pass." },
{ "no_pred_cf", PCO_DEBUG_NO_PRED_CF, "No predicated execution in CF." },
+ { "alloc_extra_vtxins",
+ PCO_DEBUG_ALLOC_EXTRA_VTXINS,
+ "Allocates additional vertex input registers." },
DEBUG_NAMED_VALUE_END,
};
diff --git a/src/imagination/pco/pco_internal.h b/src/imagination/pco/pco_internal.h
index fb900c3bb14..49987c4f573 100644
--- a/src/imagination/pco/pco_internal.h
+++ b/src/imagination/pco/pco_internal.h
@@ -57,6 +57,7 @@ enum pco_debug {
PCO_DEBUG_VAL_SKIP = BITFIELD64_BIT(0),
PCO_DEBUG_REINDEX = BITFIELD64_BIT(1),
PCO_DEBUG_NO_PRED_CF = BITFIELD64_BIT(2),
+ PCO_DEBUG_ALLOC_EXTRA_VTXINS = BITFIELD64_BIT(3),
};
extern uint64_t pco_debug;
@@ -353,6 +354,7 @@ typedef struct _pco_func {
unsigned next_loop; /** Next loop index. */
unsigned temps; /** Number of temps allocated. */
+ unsigned vtxins; /** Number of vertex input registers used. */
pco_ref emc; /** Execution mask counter register. */
@@ -731,6 +733,10 @@ PCO_DEFINE_CAST(pco_cf_node_as_func,
pco_foreach_instr_src (psrc, instr) \
if (pco_ref_is_hwreg(*psrc))
+#define pco_foreach_instr_src_vtxin_reg(psrc, instr) \
+ pco_foreach_instr_src (psrc, instr) \
+ if (pco_ref_is_vtxin(*psrc))
+
#define pco_cf_node_head(list) list_first_entry(list, pco_cf_node, link)
#define pco_cf_node_tail(list) list_last_entry(list, pco_cf_node, link)
@@ -1985,6 +1991,17 @@ static inline bool pco_ref_is_scalar(pco_ref ref)
return !ref.chans;
}
+/**
+ * \brief Return whether a reference is a vertex input register.
+ *
+ * \param[in] ref PCO reference.
+ * \return True if the reference is a vertex input register.
+ */
+static inline bool pco_ref_is_vtxin(pco_ref ref)
+{
+ return ref.type == PCO_REF_TYPE_REG && ref.reg_class == PCO_REG_CLASS_VTXIN;
+}
+
/* PCO ref getters. */
/**
* \brief Returns the pointee component of an indexed register reference.
diff --git a/src/imagination/pco/pco_print.c b/src/imagination/pco/pco_print.c
index 17ad1996f88..bd50c82bf0c 100644
--- a/src/imagination/pco/pco_print.c
+++ b/src/imagination/pco/pco_print.c
@@ -1187,8 +1187,10 @@ static void pco_print_func(pco_print_state *state, pco_func *func)
{
pco_printfi(state, "func");
pco_print_func_sig(state, func, false);
- if (state->is_grouped)
+ if (state->is_grouped) {
pco_printf(state, " /* temps: %u */", func->temps);
+ pco_printf(state, " /* vtxins: %u */", func->vtxins);
+ }
pco_printf(state, "\n");
pco_printfi(state, "{\n");
diff --git a/src/imagination/pco/pco_ra.c b/src/imagination/pco/pco_ra.c
index cca5e8f20ba..9173573dae3 100644
--- a/src/imagination/pco/pco_ra.c
+++ b/src/imagination/pco/pco_ra.c
@@ -461,9 +461,10 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
preproc_vecs(func);
+ unsigned num_rsvd_vtxins = func->parent_shader->data.common.vtxins;
unsigned num_ssas = func->next_ssa;
unsigned num_vregs = func->next_vreg;
- unsigned num_vars = num_ssas + num_vregs;
+ unsigned num_vars = num_ssas + num_vregs + num_rsvd_vtxins;
/* Collect used bit sizes. */
uint8_t used_bits = 0;
@@ -492,7 +493,9 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
assert(only_32bit);
struct ra_regs *ra_regs =
- ra_alloc_reg_set(func, ctx->allocable_temps, !only_32bit);
+ ra_alloc_reg_set(func,
+ ctx->allocable_temps + ctx->allocable_vtxins,
+ !only_32bit);
BITSET_WORD *comps =
rzalloc_array_size(ra_regs, sizeof(*comps), BITSET_WORDS(num_ssas));
@@ -609,6 +612,11 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
for (unsigned t = 0; t < ctx->allocable_temps - (stride - 1); ++t)
ra_class_add_reg(ra_class, t);
+
+ for (unsigned t = ctx->allocable_temps;
+ t < ctx->allocable_temps + ctx->allocable_vtxins - (stride - 1);
+ ++t)
+ ra_class_add_reg(ra_class, t);
}
ra_set_finalize(ra_regs, NULL);
@@ -686,6 +694,26 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
live_ranges[src.val].end =
MAX2(live_ranges[src.val].end, instr->index);
}
+
+ /* Ensure that vertex input registers with pre-initialised data are not
+ * clobbered too early.
+ */
+ if (ctx->allocable_vtxins > 0) {
+ pco_foreach_instr_src_vtxin_reg (psrc, instr) {
+ pco_ref src = *psrc;
+
+ /* Place vtxin regs after ssa vars and vregs. */
+ src.val += num_ssas + num_vregs;
+
+ live_ranges[src.val].end =
+ MAX2(live_ranges[src.val].end, instr->index);
+ live_ranges[src.val].start = 0;
+
+ ra_set_node_reg(ra_graph,
+ src.val,
+ psrc->val + ctx->allocable_temps);
+ }
+ }
}
/* Extend lifetimes of non-overriden vecs that have comp instructions. */
@@ -898,16 +926,31 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
}
for (unsigned u = 0; u < chans; ++u) {
- pco_ref dest =
- pco_ref_hwreg(temp_dest_base + offset, PCO_REG_CLASS_TEMP);
+ pco_ref dest;
+ if (temp_dest_base + offset >= ctx->allocable_temps) {
+ dest = pco_ref_hwreg(temp_dest_base + offset -
+ ctx->allocable_temps,
+ PCO_REG_CLASS_VTXIN);
+ } else {
+ dest = pco_ref_hwreg(temp_dest_base + offset,
+ PCO_REG_CLASS_TEMP);
+ }
+
dest = pco_ref_offset(dest, u);
dest = pco_ref_offset(dest, ctx->temp_alloc_offset);
pco_ref src;
- if (pco_ref_is_ssa(*psrc) || pco_ref_is_vreg(*psrc))
- src = pco_ref_hwreg(temp_src_base, PCO_REG_CLASS_TEMP);
- else
+ if (pco_ref_is_ssa(*psrc) || pco_ref_is_vreg(*psrc)) {
+ if (temp_src_base >= ctx->allocable_temps) {
+ src =
+ pco_ref_hwreg(temp_src_base - ctx->allocable_temps,
+ PCO_REG_CLASS_VTXIN);
+ } else {
+ src = pco_ref_hwreg(temp_src_base, PCO_REG_CLASS_TEMP);
+ }
+ } else {
src = pco_ref_chans(*psrc, 1);
+ }
src = pco_ref_offset(src, u);
src = pco_ref_offset(src, ctx->temp_alloc_offset);
@@ -981,7 +1024,15 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
pdest->type = PCO_REF_TYPE_REG;
pdest->reg_class = PCO_REG_CLASS_TEMP;
pdest->val = val + ctx->temp_alloc_offset;
- temps = MAX2(temps, dest_temps + ctx->temp_alloc_offset);
+
+ /* Got a vertex input register. */
+ if (val >= ctx->allocable_temps) {
+ pdest->reg_class = PCO_REG_CLASS_VTXIN;
+ pdest->val = val - ctx->allocable_temps;
+ vtxins = MAX2(vtxins, dest_temps - ctx->allocable_temps);
+ } else {
+ temps = MAX2(temps, dest_temps + ctx->temp_alloc_offset);
+ }
}
pco_foreach_instr_src_ssa (psrc, instr) {
@@ -996,6 +1047,12 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
psrc->type = PCO_REF_TYPE_REG;
psrc->reg_class = PCO_REG_CLASS_TEMP;
psrc->val = val + ctx->temp_alloc_offset;
+
+ /* Got a vertex input register. */
+ if (val >= ctx->allocable_temps) {
+ psrc->reg_class = PCO_REG_CLASS_VTXIN;
+ psrc->val = val - ctx->allocable_temps;
+ }
}
pco_foreach_instr_dest_vreg (pdest, instr) {
@@ -1005,7 +1062,15 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
pdest->type = PCO_REF_TYPE_REG;
pdest->reg_class = PCO_REG_CLASS_TEMP;
pdest->val = val + ctx->temp_alloc_offset;
- temps = MAX2(temps, dest_temps);
+
+ /* Got a vertex input register. */
+ if (val >= ctx->allocable_temps) {
+ pdest->reg_class = PCO_REG_CLASS_VTXIN;
+ pdest->val = val - ctx->allocable_temps;
+ vtxins = MAX2(vtxins, dest_temps - ctx->allocable_temps);
+ } else {
+ temps = MAX2(temps, dest_temps);
+ }
}
pco_foreach_instr_src_vreg (psrc, instr) {
@@ -1014,6 +1079,12 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
psrc->type = PCO_REF_TYPE_REG;
psrc->reg_class = PCO_REG_CLASS_TEMP;
psrc->val = val + ctx->temp_alloc_offset;
+
+ /* Got a vertex input register. */
+ if (val >= ctx->allocable_temps) {
+ psrc->reg_class = PCO_REG_CLASS_VTXIN;
+ psrc->val = val - ctx->allocable_temps;
+ }
}
/* Drop no-ops. */
@@ -1027,6 +1098,7 @@ static bool pco_ra_func(pco_func *func, pco_ra_ctx *ctx)
ralloc_free(ra_regs);
func->temps = temps;
+ func->vtxins = vtxins;
if (pco_should_print_shader(func->parent_shader) && PCO_DEBUG_PRINT(RA)) {
printf(
@@ -1060,12 +1132,27 @@ bool pco_ra(pco_shader *shader)
* unsigned opt_temps = rogue_get_optimal_temps(shader->ctx->dev_info);
*/
+ /* If any vertex input registers are already used, round up to the nearest
+ * multiple of 4 as vertex input registers are allocated in blocks of 4.
+ *
+ * This number is used by default as the maximum safe number of vertex input
+ * registers that can be used is not currently known.
+ */
+ unsigned hw_vtxins = ALIGN_POT(shader->data.common.vtxins,
+ ROGUE_USRM_GRANULARITY_IN_REGISTERS);
+
+ if (shader->stage != MESA_SHADER_FRAGMENT && !shader->is_internal) {
+ if (PCO_DEBUG(ALLOC_EXTRA_VTXINS)) {
+ hw_vtxins = MAX2(hw_vtxins, rogue_get_vtxins());
+ }
+ }
+
/* TODO: different number of temps available if preamble/phase change. */
/* TODO: different number of temps available if barriers are in use. */
- /* TODO: support for internal and vtxin registers. */
+ /* TODO: support for internal registers. */
pco_ra_ctx ctx = {
.allocable_temps = hw_temps,
- .allocable_vtxins = 0,
+ .allocable_vtxins = hw_vtxins,
.allocable_interns = 0,
};
@@ -1088,6 +1175,8 @@ bool pco_ra(pco_shader *shader)
progress |= pco_ra_func(func, &ctx);
shader->data.common.temps = MAX2(shader->data.common.temps, func->temps);
+ shader->data.common.vtxins =
+ MAX2(shader->data.common.vtxins, func->vtxins);
}
shader->data.common.spilled_temps = ctx.spilled_temps;
diff --git a/src/util/shader_stats.xml b/src/util/shader_stats.xml
index 137f2ffb335..340f72fbe42 100644
--- a/src/util/shader_stats.xml
+++ b/src/util/shader_stats.xml
@@ -200,6 +200,7 @@
Scratch size per instance in bytes
Number of spilled registers per instance
Number of allocated temp registers
+ Number of used vertex input registers
Number of not unrolled loops in the shader
Total number of instruction groups
Number of main instruction groups