panvk: Add a panvk_common_sysvals struct

For geometry shaders, we're going to need to compile various graphics
shaders down to compute shaders.  This means that they'll look like
compute shaders to much of the compile pipeline but ultimately get
executed as graphics shaders.  Most of the time, the compiler will just
happily take whatever offset you give and try to load the sysval from
there so you can load a graphics sysval from a compute shader just fine.
However, for the common ones, we switch on the shader stage and load
from a different offset for 3D vs. compute.  This breaks the moment you
have a compute shader that's going to actually load from a 3D sysval
space.

The solution here is to ensure that any common sysvals (currently just
the push uniforms address and the printf buffer) are at exactly the same
offset in both.  This is done by adding a panvk_common_sysvals struct,
some static asserts, and a bit of macro magic to keep things eurgonamic.
This also changes push uniform upload to just swap in the push uniform
address instead of writing it to the command buffer on every iteration.

Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38508>
This commit is contained in:
Faith Ekstrand 2025-11-15 18:34:46 -05:00 committed by Marge Bot
parent 3ebabe9e43
commit cbd0c9eb3b
5 changed files with 90 additions and 43 deletions

View file

@ -97,11 +97,49 @@ struct panvk_input_attachment_info {
#define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t
/* System values which are common to both graphics and compute. These are
* always at the same offset in both graphics and compute allowing us to
* compile the shader without knowing which queue it will be dispatched on.
*/
struct panvk_common_sysvals_inner {
/* Address of sysval/push constant buffer used for indirect loads */
aligned_u64 push_uniforms;
/* Address of the printf buffer */
aligned_u64 printf_buffer_address;
} __attribute__((aligned(FAU_WORD_SIZE)));
struct panvk_common_sysvals {
uint32_t _pad[4];
struct panvk_common_sysvals_inner common;
} __attribute__((aligned(FAU_WORD_SIZE)));
static_assert((offsetof(struct panvk_common_sysvals, common) %
FAU_WORD_SIZE) == 0,
"struct panvk_graphics_sysvals_inner must be 8-byte aligned");
static_assert((sizeof(struct panvk_common_sysvals_inner) %
FAU_WORD_SIZE) == 0,
"struct panvk_graphics_sysvals_inner must be 8-byte aligned");
#define SYSVALS_COMMON_START \
(offsetof(struct panvk_common_sysvals, common) / FAU_WORD_SIZE)
#define SYSVALS_COMMON_COUNT \
(sizeof(struct panvk_common_sysvals_inner) / FAU_WORD_SIZE)
#define SYSVALS_COMMON_END (SYSVALS_COMMON_START + SYSVALS_COMMON_COUNT)
struct panvk_graphics_sysvals {
/* Blend constants MUST come first because their position cannot depend on
* the FAU packing of the fragment shader.
*/
struct {
float constants[4];
} blend;
/* This must be at the same offset for both compute and graphics */
struct panvk_common_sysvals_inner common;
struct {
struct {
float x, y, z;
@ -117,10 +155,6 @@ struct panvk_graphics_sysvals {
uint32_t noperspective_varyings;
} vs;
/* Address of sysval/push constant buffer used for indirect loads */
aligned_u64 push_uniforms;
aligned_u64 printf_buffer_address;
struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE];
#if PAN_ARCH < 9
@ -135,11 +169,13 @@ struct panvk_graphics_sysvals {
#endif
} __attribute__((aligned(FAU_WORD_SIZE)));
static_assert(offsetof(struct panvk_graphics_sysvals, blend) == 0,
"panvk_graphics_sysvals::blend must be at the start");
static_assert(offsetof(struct panvk_graphics_sysvals, common) ==
offsetof(struct panvk_common_sysvals, common),
"Common sysvals must be at the same offset everywhere");
static_assert((sizeof(struct panvk_graphics_sysvals) % FAU_WORD_SIZE) == 0,
"struct panvk_graphics_sysvals must be 8-byte aligned");
static_assert((offsetof(struct panvk_graphics_sysvals, push_uniforms) %
FAU_WORD_SIZE) == 0,
"panvk_graphics_sysvals::push_uniforms must be 8-byte aligned");
#if PAN_ARCH < 9
static_assert((offsetof(struct panvk_graphics_sysvals, desc) % FAU_WORD_SIZE) ==
0,
@ -150,6 +186,12 @@ struct panvk_compute_sysvals {
struct {
uint32_t x, y, z;
} base;
uint32_t _pad;
/* This must be at the same offset for both compute and graphics */
struct panvk_common_sysvals_inner common;
struct {
uint32_t x, y, z;
} num_work_groups;
@ -157,10 +199,6 @@ struct panvk_compute_sysvals {
uint32_t x, y, z;
} local_group_size;
/* Address of sysval/push constant buffer used for indirect loads */
aligned_u64 push_uniforms;
aligned_u64 printf_buffer_address;
#if PAN_ARCH < 9
struct {
aligned_u64 sets[PANVK_DESC_TABLE_COMPUTE_COUNT];
@ -168,11 +206,11 @@ struct panvk_compute_sysvals {
#endif
} __attribute__((aligned(FAU_WORD_SIZE)));
static_assert(offsetof(struct panvk_compute_sysvals, common) ==
offsetof(struct panvk_common_sysvals, common),
"Common sysvals must be at the same offset everywhere");
static_assert((sizeof(struct panvk_compute_sysvals) % FAU_WORD_SIZE) == 0,
"struct panvk_compute_sysvals must be 8-byte aligned");
static_assert((offsetof(struct panvk_compute_sysvals, push_uniforms) %
FAU_WORD_SIZE) == 0,
"panvk_compute_sysvals::push_uniforms must be 8-byte aligned");
#if PAN_ARCH < 9
static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) ==
0,
@ -185,11 +223,27 @@ static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) ==
*/
#define SYSVALS_PUSH_CONST_BASE MAX_PUSH_CONSTANTS_SIZE
#define sysval_size(__ptype, __name) \
sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name)
#define common_sysval_size(__name) \
sizeof(((struct panvk_common_sysvals *)NULL)->common.__name)
#define sysval_offset(__ptype, __name) \
offsetof(struct panvk_##__ptype##_sysvals, __name)
#define graphics_sysval_size(__name) \
sizeof(((struct panvk_graphics_sysvals *)NULL)->__name)
#define compute_sysval_size(__name) \
sizeof(((struct panvk_compute_sysvals *)NULL)->__name)
#define sysval_size(__ptype, __name) __ptype##_sysval_size(__name)
#define common_sysval_offset(__name) \
offsetof(struct panvk_common_sysvals, common.__name)
#define graphics_sysval_offset(__name) \
offsetof(struct panvk_graphics_sysvals, __name)
#define compute_sysval_offset(__name) \
offsetof(struct panvk_compute_sysvals, __name)
#define sysval_offset(__ptype, __name) __ptype##_sysval_offset(__name)
#define sysval_entry_size(__ptype, __name) \
sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name[0])

View file

@ -14,7 +14,6 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
{
const struct panvk_shader_variant *cs =
panvk_shader_only_variant(cmdbuf->state.compute.shader);
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};
@ -43,8 +42,6 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)(
cs->cs.local_size.y);
set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z,
cs->cs.local_size.z);
set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
dev->printf.bo->addr.dev);
#if PAN_ARCH < 9
struct panvk_descriptor_state *desc_state =

View file

@ -713,15 +713,12 @@ void
panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
const struct panvk_draw_info *info)
{
const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
const struct panvk_shader_variant *fs =
panvk_shader_only_variant(get_fs(cmdbuf));
uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0;
BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};
set_gfx_sysval(cmdbuf, dirty_sysvals, printf_buffer_address,
dev->printf.bo->addr.dev);
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings,
noperspective_varyings);
set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base);

View file

@ -12,6 +12,7 @@ panvk_per_arch(cmd_prepare_push_uniforms)(
struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader,
uint32_t repeat_count)
{
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
uint64_t *push_ptr;
switch (shader->info.stage) {
@ -47,26 +48,32 @@ panvk_per_arch(cmd_prepare_push_uniforms)(
if (!push_uniforms.gpu)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
uint64_t *sysvals = shader->info.stage == MESA_SHADER_COMPUTE
const uint64_t *sysvals = shader->info.stage == MESA_SHADER_COMPUTE
? (uint64_t *)&cmdbuf->state.compute.sysvals
: (uint64_t *)&cmdbuf->state.gfx.sysvals;
struct panvk_common_sysvals_inner common_inner = {
.printf_buffer_address = dev->printf.bo->addr.dev,
};
uint64_t *common = (uint64_t *)&common_inner;
uint64_t *push_consts = cmdbuf->state.push_constants.data;
uint64_t *faus = push_uniforms.cpu;
uint32_t w, fau = 0;
for (uint32_t r = 0; r < repeat_count; r++) {
uint64_t addr =
common_inner.push_uniforms =
push_uniforms.gpu + r * shader->fau.total_count * sizeof(uint64_t);
if (shader->info.stage == MESA_SHADER_COMPUTE)
cmdbuf->state.compute.sysvals.push_uniforms = addr;
else
cmdbuf->state.gfx.sysvals.push_uniforms = addr;
/* After packing, the sysvals come first, followed by the user push
* constants. The ordering is encoded shader side, so don't re-order
* these loops. */
BITSET_FOREACH_SET(w, shader->fau.used_sysvals, MAX_SYSVAL_FAUS)
faus[fau++] = sysvals[w];
BITSET_FOREACH_SET(w, shader->fau.used_sysvals, MAX_SYSVAL_FAUS) {
if (w >= SYSVALS_COMMON_START && w < SYSVALS_COMMON_END)
faus[fau++] = common[w - SYSVALS_COMMON_START];
else
faus[fau++] = sysvals[w];
}
BITSET_FOREACH_SET(w, shader->fau.used_push_consts, MAX_PUSH_CONST_FAUS)
faus[fau++] = push_consts[w];

View file

@ -141,10 +141,7 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
break;
case nir_intrinsic_load_printf_buffer_address:
if (b->shader->info.stage == MESA_SHADER_COMPUTE)
val = load_sysval(b, compute, bit_size, printf_buffer_address);
else
val = load_sysval(b, graphics, bit_size, printf_buffer_address);
val = load_sysval(b, common, bit_size, printf_buffer_address);
break;
case nir_intrinsic_load_input_attachment_target_pan: {
@ -587,10 +584,7 @@ collect_push_constant(struct nir_builder *b, nir_intrinsic_instr *intr,
/* Flag the push_uniforms sysval as needed if we have an indirect offset.
*/
if (b->shader->info.stage == MESA_SHADER_COMPUTE)
shader_use_sysval(shader, compute, push_uniforms);
else
shader_use_sysval(shader, graphics, push_uniforms);
shader_use_sysval(shader, common, push_uniforms);
} else {
offset = base + nir_src_as_uint(intr->src[0]);
size = (intr->def.bit_size / 8) * intr->def.num_components;
@ -641,9 +635,7 @@ move_push_constant(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
* .base=SYSVALS_PUSH_CONST_BASE, and we're supposed to force a base of
* zero in this pass. */
unsigned push_const_buf_offset = shader_remapped_sysval_offset(
shader, b->shader->info.stage == MESA_SHADER_COMPUTE
? sysval_offset(compute, push_uniforms)
: sysval_offset(graphics, push_uniforms));
shader, sysval_offset(common, push_uniforms));
nir_def *push_const_buf = nir_load_push_constant(
b, 1, 64, nir_imm_int(b, push_const_buf_offset));
unsigned push_const_offset = is_sysval ?