diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 026975525cf..a498446600f 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -97,11 +97,49 @@ struct panvk_input_attachment_info { #define aligned_u64 __attribute__((aligned(sizeof(uint64_t)))) uint64_t +/* System values which are common to both graphics and compute. These are + * always at the same offset in both graphics and compute allowing us to + * compile the shader without knowing which queue it will be dispatched on. + */ +struct panvk_common_sysvals_inner { + /* Address of sysval/push constant buffer used for indirect loads */ + aligned_u64 push_uniforms; + + /* Address of the printf buffer */ + aligned_u64 printf_buffer_address; +} __attribute__((aligned(FAU_WORD_SIZE))); + +struct panvk_common_sysvals { + uint32_t _pad[4]; + struct panvk_common_sysvals_inner common; +} __attribute__((aligned(FAU_WORD_SIZE))); + +static_assert((offsetof(struct panvk_common_sysvals, common) % + FAU_WORD_SIZE) == 0, + "struct panvk_graphics_sysvals_inner must be 8-byte aligned"); +static_assert((sizeof(struct panvk_common_sysvals_inner) % + FAU_WORD_SIZE) == 0, + "struct panvk_graphics_sysvals_inner must be 8-byte aligned"); + +#define SYSVALS_COMMON_START \ + (offsetof(struct panvk_common_sysvals, common) / FAU_WORD_SIZE) + +#define SYSVALS_COMMON_COUNT \ + (sizeof(struct panvk_common_sysvals_inner) / FAU_WORD_SIZE) + +#define SYSVALS_COMMON_END (SYSVALS_COMMON_START + SYSVALS_COMMON_COUNT) + struct panvk_graphics_sysvals { + /* Blend constants MUST come first because their position cannot depend on + * the FAU packing of the fragment shader. + */ struct { float constants[4]; } blend; + /* This must be at the same offset for both compute and graphics */ + struct panvk_common_sysvals_inner common; + struct { struct { float x, y, z; @@ -117,10 +155,6 @@ struct panvk_graphics_sysvals { uint32_t noperspective_varyings; } vs; - /* Address of sysval/push constant buffer used for indirect loads */ - aligned_u64 push_uniforms; - aligned_u64 printf_buffer_address; - struct panvk_input_attachment_info iam[INPUT_ATTACHMENT_MAP_SIZE]; #if PAN_ARCH < 9 @@ -135,11 +169,13 @@ struct panvk_graphics_sysvals { #endif } __attribute__((aligned(FAU_WORD_SIZE))); +static_assert(offsetof(struct panvk_graphics_sysvals, blend) == 0, + "panvk_graphics_sysvals::blend must be at the start"); +static_assert(offsetof(struct panvk_graphics_sysvals, common) == + offsetof(struct panvk_common_sysvals, common), + "Common sysvals must be at the same offset everywhere"); static_assert((sizeof(struct panvk_graphics_sysvals) % FAU_WORD_SIZE) == 0, "struct panvk_graphics_sysvals must be 8-byte aligned"); -static_assert((offsetof(struct panvk_graphics_sysvals, push_uniforms) % - FAU_WORD_SIZE) == 0, - "panvk_graphics_sysvals::push_uniforms must be 8-byte aligned"); #if PAN_ARCH < 9 static_assert((offsetof(struct panvk_graphics_sysvals, desc) % FAU_WORD_SIZE) == 0, @@ -150,6 +186,12 @@ struct panvk_compute_sysvals { struct { uint32_t x, y, z; } base; + + uint32_t _pad; + + /* This must be at the same offset for both compute and graphics */ + struct panvk_common_sysvals_inner common; + struct { uint32_t x, y, z; } num_work_groups; @@ -157,10 +199,6 @@ struct panvk_compute_sysvals { uint32_t x, y, z; } local_group_size; - /* Address of sysval/push constant buffer used for indirect loads */ - aligned_u64 push_uniforms; - aligned_u64 printf_buffer_address; - #if PAN_ARCH < 9 struct { aligned_u64 sets[PANVK_DESC_TABLE_COMPUTE_COUNT]; @@ -168,11 +206,11 @@ struct panvk_compute_sysvals { #endif } __attribute__((aligned(FAU_WORD_SIZE))); +static_assert(offsetof(struct panvk_compute_sysvals, common) == + offsetof(struct panvk_common_sysvals, common), + "Common sysvals must be at the same offset everywhere"); static_assert((sizeof(struct panvk_compute_sysvals) % FAU_WORD_SIZE) == 0, "struct panvk_compute_sysvals must be 8-byte aligned"); -static_assert((offsetof(struct panvk_compute_sysvals, push_uniforms) % - FAU_WORD_SIZE) == 0, - "panvk_compute_sysvals::push_uniforms must be 8-byte aligned"); #if PAN_ARCH < 9 static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) == 0, @@ -185,11 +223,27 @@ static_assert((offsetof(struct panvk_compute_sysvals, desc) % FAU_WORD_SIZE) == */ #define SYSVALS_PUSH_CONST_BASE MAX_PUSH_CONSTANTS_SIZE -#define sysval_size(__ptype, __name) \ - sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name) +#define common_sysval_size(__name) \ + sizeof(((struct panvk_common_sysvals *)NULL)->common.__name) -#define sysval_offset(__ptype, __name) \ - offsetof(struct panvk_##__ptype##_sysvals, __name) +#define graphics_sysval_size(__name) \ + sizeof(((struct panvk_graphics_sysvals *)NULL)->__name) + +#define compute_sysval_size(__name) \ + sizeof(((struct panvk_compute_sysvals *)NULL)->__name) + +#define sysval_size(__ptype, __name) __ptype##_sysval_size(__name) + +#define common_sysval_offset(__name) \ + offsetof(struct panvk_common_sysvals, common.__name) + +#define graphics_sysval_offset(__name) \ + offsetof(struct panvk_graphics_sysvals, __name) + +#define compute_sysval_offset(__name) \ + offsetof(struct panvk_compute_sysvals, __name) + +#define sysval_offset(__ptype, __name) __ptype##_sysval_offset(__name) #define sysval_entry_size(__ptype, __name) \ sizeof(((struct panvk_##__ptype##_sysvals *)NULL)->__name[0]) diff --git a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c index 917f33ef38a..d86467d15ff 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c @@ -14,7 +14,6 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)( { const struct panvk_shader_variant *cs = panvk_shader_only_variant(cmdbuf->state.compute.shader); - const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0}; @@ -43,8 +42,6 @@ panvk_per_arch(cmd_prepare_dispatch_sysvals)( cs->cs.local_size.y); set_compute_sysval(cmdbuf, dirty_sysvals, local_group_size.z, cs->cs.local_size.z); - set_compute_sysval(cmdbuf, dirty_sysvals, printf_buffer_address, - dev->printf.bo->addr.dev); #if PAN_ARCH < 9 struct panvk_descriptor_state *desc_state = diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 10cc8444cec..ab6bbe55d8a 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -713,15 +713,12 @@ void panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, const struct panvk_draw_info *info) { - const struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; const struct panvk_shader_variant *fs = panvk_shader_only_variant(get_fs(cmdbuf)); uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0; BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0}; - set_gfx_sysval(cmdbuf, dirty_sysvals, printf_buffer_address, - dev->printf.bo->addr.dev); set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings, noperspective_varyings); set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base); diff --git a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c index 72929975ba6..9428bc9f950 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_push_constant.c @@ -12,6 +12,7 @@ panvk_per_arch(cmd_prepare_push_uniforms)( struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader_variant *shader, uint32_t repeat_count) { + struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); uint64_t *push_ptr; switch (shader->info.stage) { @@ -47,26 +48,32 @@ panvk_per_arch(cmd_prepare_push_uniforms)( if (!push_uniforms.gpu) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - uint64_t *sysvals = shader->info.stage == MESA_SHADER_COMPUTE + const uint64_t *sysvals = shader->info.stage == MESA_SHADER_COMPUTE ? (uint64_t *)&cmdbuf->state.compute.sysvals : (uint64_t *)&cmdbuf->state.gfx.sysvals; + + struct panvk_common_sysvals_inner common_inner = { + .printf_buffer_address = dev->printf.bo->addr.dev, + }; + uint64_t *common = (uint64_t *)&common_inner; + uint64_t *push_consts = cmdbuf->state.push_constants.data; uint64_t *faus = push_uniforms.cpu; uint32_t w, fau = 0; for (uint32_t r = 0; r < repeat_count; r++) { - uint64_t addr = + common_inner.push_uniforms = push_uniforms.gpu + r * shader->fau.total_count * sizeof(uint64_t); - if (shader->info.stage == MESA_SHADER_COMPUTE) - cmdbuf->state.compute.sysvals.push_uniforms = addr; - else - cmdbuf->state.gfx.sysvals.push_uniforms = addr; /* After packing, the sysvals come first, followed by the user push * constants. The ordering is encoded shader side, so don't re-order * these loops. */ - BITSET_FOREACH_SET(w, shader->fau.used_sysvals, MAX_SYSVAL_FAUS) - faus[fau++] = sysvals[w]; + BITSET_FOREACH_SET(w, shader->fau.used_sysvals, MAX_SYSVAL_FAUS) { + if (w >= SYSVALS_COMMON_START && w < SYSVALS_COMMON_END) + faus[fau++] = common[w - SYSVALS_COMMON_START]; + else + faus[fau++] = sysvals[w]; + } BITSET_FOREACH_SET(w, shader->fau.used_push_consts, MAX_PUSH_CONST_FAUS) faus[fau++] = push_consts[w]; diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index 8e04d789904..2f4f0b28130 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -141,10 +141,7 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data) break; case nir_intrinsic_load_printf_buffer_address: - if (b->shader->info.stage == MESA_SHADER_COMPUTE) - val = load_sysval(b, compute, bit_size, printf_buffer_address); - else - val = load_sysval(b, graphics, bit_size, printf_buffer_address); + val = load_sysval(b, common, bit_size, printf_buffer_address); break; case nir_intrinsic_load_input_attachment_target_pan: { @@ -587,10 +584,7 @@ collect_push_constant(struct nir_builder *b, nir_intrinsic_instr *intr, /* Flag the push_uniforms sysval as needed if we have an indirect offset. */ - if (b->shader->info.stage == MESA_SHADER_COMPUTE) - shader_use_sysval(shader, compute, push_uniforms); - else - shader_use_sysval(shader, graphics, push_uniforms); + shader_use_sysval(shader, common, push_uniforms); } else { offset = base + nir_src_as_uint(intr->src[0]); size = (intr->def.bit_size / 8) * intr->def.num_components; @@ -641,9 +635,7 @@ move_push_constant(struct nir_builder *b, nir_intrinsic_instr *intr, void *data) * .base=SYSVALS_PUSH_CONST_BASE, and we're supposed to force a base of * zero in this pass. */ unsigned push_const_buf_offset = shader_remapped_sysval_offset( - shader, b->shader->info.stage == MESA_SHADER_COMPUTE - ? sysval_offset(compute, push_uniforms) - : sysval_offset(graphics, push_uniforms)); + shader, sysval_offset(common, push_uniforms)); nir_def *push_const_buf = nir_load_push_constant( b, 1, 64, nir_imm_int(b, push_const_buf_offset)); unsigned push_const_offset = is_sysval ?