diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index b97fc4e5fb1..1c840679a88 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -120,6 +120,15 @@ struct pan_compile_inputs { */ uint32_t fixed_varying_mask; + /* Settings to move constants into the FAU. */ + struct { + uint32_t *values; + /* In multiples of 32bit. */ + uint32_t max_amount; + /* In multiples of 32bit. */ + uint32_t offset; + } fau_consts; + union { struct { uint32_t rt_conv[8]; @@ -204,6 +213,9 @@ struct pan_shader_info { /* Bit mask of preloaded registers */ uint64_t preload; + uint32_t fau_consts_count; + uint32_t fau_consts[128]; + union { struct { bool reads_frag_coord; diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index e09bcec5526..437e9faa766 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -2567,8 +2567,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, struct cs_index draw_id = cs_scratch_reg32(b, 7); struct cs_index vs_fau_addr = cs_scratch_reg64(b, 8); struct cs_index tracing_scratch_regs = cs_scratch_reg_tuple(b, 10, 4); - uint32_t vs_fau_count = BITSET_COUNT(vs->fau.used_sysvals) + - BITSET_COUNT(vs->fau.used_push_consts); + uint32_t vs_fau_count = vs->fau.total_count; if (draw->indirect.count_buffer_dev_addr) { cs_move32_to(b, max_draw_count, draw->indirect.draw_count); diff --git a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c index 1a1196c62d8..7888e1f4850 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c @@ -70,6 +70,11 @@ panvk_per_arch(cmd_prepare_push_uniforms)( BITSET_FOREACH_SET(w, shader->fau.used_push_consts, MAX_PUSH_CONST_FAUS) faus[fau++] = push_consts[w]; + + for (uint32_t i = 0; i < shader->info.fau_consts_count; i += 2) { + faus[fau++] = (uint64_t)shader->info.fau_consts[i + 1] << 32 | + shader->info.fau_consts[i]; + } } *push_ptr = push_uniforms.gpu; diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index d3a478bf5da..530dceae428 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -1,4 +1,5 @@ /* + * Copyright © 2025 Arm Ltd. * Copyright © 2021 Collabora Ltd. * * Derived from tu_shader.c which is: @@ -62,6 +63,8 @@ #include "vk_shader.h" #include "vk_util.h" +#define FAU_WORD_COUNT 64 + struct panvk_lower_sysvals_context { struct panvk_shader_variant *shader; const struct vk_graphics_pipeline_state *state; @@ -710,10 +713,11 @@ lower_load_push_consts(nir_shader *nir, struct panvk_shader_variant *shader) * needed in the blend shader. */ shader->fau.sysval_count = BITSET_COUNT(shader->fau.used_sysvals); /* 32 FAUs (256 bytes) are reserved for API push constants */ - assert(shader->fau.sysval_count <= 64 - 32 && "too many sysval FAUs"); + assert(shader->fau.sysval_count <= FAU_WORD_COUNT - 32 && + "too many sysval FAUs"); shader->fau.total_count = shader->fau.sysval_count + BITSET_COUNT(shader->fau.used_push_consts); - assert(shader->fau.total_count <= 64 && + assert(shader->fau.total_count <= FAU_WORD_COUNT && "asking for more FAUs than the hardware has to offer"); if (!progress) @@ -977,6 +981,11 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir, util_dynarray_init(&binary, NULL); pan_shader_compile(nir, compile_input, &binary, &shader->info); + /* Propagate potential additional FAU values into the panvk info struct. */ + /* FAU consts are pushed as 32bit values, but total_count is for 64bit + * ones. */ + shader->fau.total_count += DIV_ROUND_UP(shader->info.fau_consts_count, 2); + void *bin_ptr = util_dynarray_element(&binary, uint8_t, 0); unsigned bin_size = util_dynarray_num_elements(&binary, uint8_t); @@ -1355,6 +1364,13 @@ panvk_compile_shader(struct panvk_device *dev, info->robustness, noperspective_varyings, state, &inputs, variant); + /* Allow the remaining FAU space to be filled with constants. */ + input_variants[v].fau_consts.max_amount = + 2 * (FAU_WORD_COUNT - variant->fau.total_count); + input_variants[v].fau_consts.offset = variant->fau.total_count * 2; + input_variants[v].fau_consts.values = &variant->info.fau_consts[0]; + assert(input_variants[v].fau_consts.max_amount <= ARRAY_SIZE(variant->info.fau_consts)); + variant->own_bin = true; result = panvk_compile_nir(dev, nir_variants[v], info->flags, @@ -1391,6 +1407,12 @@ panvk_compile_shader(struct panvk_device *dev, inputs.valhall.use_ld_var_buf = panvk_use_ld_var_buf(variant); #endif + /* Allow the remaining FAU space to be filled with constants. */ + inputs.fau_consts.max_amount = 2 * (FAU_WORD_COUNT - variant->fau.total_count); + inputs.fau_consts.offset = variant->fau.total_count * 2; + inputs.fau_consts.values = &variant->info.fau_consts[0]; + assert(inputs.fau_consts.max_amount <= ARRAY_SIZE(variant->info.fau_consts)); + result = panvk_compile_nir(dev, nir, info->flags, &inputs, variant); /* We need to update info.push.count because it's used to initialize the