mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
panvk: Extend the shader logic to support Valhall
Co-developed-by: Boris Brezillon <boris.brezillon@collabora.com> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30736>
This commit is contained in:
parent
e350c334b6
commit
27beadcbdb
3 changed files with 189 additions and 3 deletions
|
|
@ -81,6 +81,7 @@ foreach arch : [6, 7, 9, 10]
|
|||
'panvk_vX_descriptor_set.c',
|
||||
'panvk_vX_descriptor_set_layout.c',
|
||||
'panvk_vX_nir_lower_descriptors.c',
|
||||
'panvk_vX_shader.c',
|
||||
]
|
||||
else
|
||||
per_arch_files = common_per_arch_files
|
||||
|
|
|
|||
|
|
@ -149,7 +149,19 @@ struct panvk_shader {
|
|||
uint32_t bin_size;
|
||||
|
||||
struct panvk_priv_mem code_mem;
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
struct panvk_priv_mem rsd;
|
||||
#else
|
||||
union {
|
||||
struct panvk_priv_mem spd;
|
||||
struct {
|
||||
struct panvk_priv_mem pos_points;
|
||||
struct panvk_priv_mem pos_triangles;
|
||||
struct panvk_priv_mem var;
|
||||
} spds;
|
||||
};
|
||||
#endif
|
||||
|
||||
const char *nir_str;
|
||||
const char *asm_str;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,11 @@
|
|||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
/* FIXME: make the include statement unconditional when the CSF command buffer
|
||||
* logic is implemented. */
|
||||
#if PAN_ARCH <= 7
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#endif
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_instance.h"
|
||||
#include "panvk_mempool.h"
|
||||
|
|
@ -122,11 +126,13 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
|||
bit_size, num_comps);
|
||||
break;
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
case nir_intrinsic_load_layer_id:
|
||||
assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
val = load_sysval_from_push_const(b, SYSVAL(graphics, layer_id), bit_size,
|
||||
num_comps);
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
return false;
|
||||
|
|
@ -138,6 +144,7 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
|||
return true;
|
||||
}
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
static bool
|
||||
lower_gl_pos_layer_writes(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
|
|
@ -213,6 +220,7 @@ lower_layer_writes(nir_shader *nir)
|
|||
nir, lower_gl_pos_layer_writes,
|
||||
nir_metadata_block_index | nir_metadata_dominance, temp_layer_var);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
||||
|
|
@ -232,7 +240,8 @@ panvk_buffer_ubo_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)
|
|||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
|
||||
return nir_address_format_32bit_index_offset;
|
||||
return PAN_ARCH <= 7 ? nir_address_format_32bit_index_offset
|
||||
: nir_address_format_vec2_index_32bit_offset;
|
||||
default:
|
||||
unreachable("Invalid robust buffer access behavior");
|
||||
}
|
||||
|
|
@ -243,10 +252,12 @@ panvk_buffer_ssbo_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)
|
|||
{
|
||||
switch (robustness) {
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
|
||||
return nir_address_format_64bit_global_32bit_offset;
|
||||
return PAN_ARCH <= 7 ? nir_address_format_64bit_global_32bit_offset
|
||||
: nir_address_format_vec2_index_32bit_offset;
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
|
||||
case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
|
||||
return nir_address_format_64bit_bounded_global;
|
||||
return PAN_ARCH <= 7 ? nir_address_format_64bit_bounded_global
|
||||
: nir_address_format_vec2_index_32bit_offset;
|
||||
default:
|
||||
unreachable("Invalid robust buffer access behavior");
|
||||
}
|
||||
|
|
@ -282,9 +293,11 @@ panvk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir)
|
|||
NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir),
|
||||
true, true);
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
/* This needs to be done just after the io_to_temporaries pass, because we
|
||||
* rely on in/out temporaries to collect the final layer_id value. */
|
||||
NIR_PASS_V(nir, lower_layer_writes);
|
||||
#endif
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_indirect_derefs,
|
||||
nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
|
||||
|
|
@ -361,6 +374,36 @@ panvk_hash_graphics_state(struct vk_physical_device *device,
|
|||
_mesa_blake3_final(&blake3_ctx, blake3_out);
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
static bool
|
||||
valhall_pack_buf_idx(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_load_ubo &&
|
||||
intrin->intrinsic != nir_intrinsic_load_ssbo)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *index = intrin->src[0].ssa;
|
||||
|
||||
/* The valhall backend expects nir_address_format_32bit_index_offset,
|
||||
* but address mode is nir_address_format_vec2_index_32bit_offset to allow
|
||||
* us to store the array size, set and index without losing information
|
||||
* while walking the descriptor deref chain (needed to do a bound check on
|
||||
* the array index when we reach the end of the chain).
|
||||
* Turn it back to nir_address_format_32bit_index_offset after IOs
|
||||
* have been lowered. */
|
||||
nir_def *packed_index =
|
||||
nir_iadd(b, nir_channel(b, index, 0), nir_channel(b, index, 1));
|
||||
nir_src_rewrite(&intrin->src[0], packed_index);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
|
||||
uint32_t set_layout_count,
|
||||
|
|
@ -388,6 +431,11 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
|
||||
nir_address_format_64bit_global);
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
NIR_PASS_V(nir, nir_shader_instructions_pass, valhall_pack_buf_idx,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
#endif
|
||||
|
||||
if (gl_shader_stage_uses_workgroup(stage)) {
|
||||
if (!nir->info.shared_memory_explicit_layout) {
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
|
||||
|
|
@ -491,6 +539,7 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
shader->asm_str = asm_str;
|
||||
}
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
/* Patch the descriptor count */
|
||||
shader->info.ubo_count =
|
||||
shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_UBO] +
|
||||
|
|
@ -527,6 +576,7 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
shader->info.attribute_count =
|
||||
shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_IMG] +
|
||||
(nir->info.stage == MESA_SHADER_VERTEX ? MAX_VS_ATTRIBS : 0);
|
||||
#endif
|
||||
|
||||
shader->local_size.x = nir->info.workgroup_size[0];
|
||||
shader->local_size.y = nir->info.workgroup_size[1];
|
||||
|
|
@ -535,12 +585,36 @@ panvk_compile_nir(struct panvk_device *dev, nir_shader *nir,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
static enum mali_flush_to_zero_mode
|
||||
shader_ftz_mode(struct panvk_shader *shader)
|
||||
{
|
||||
if (shader->info.ftz_fp32) {
|
||||
if (shader->info.ftz_fp16)
|
||||
return MALI_FLUSH_TO_ZERO_MODE_ALWAYS;
|
||||
else
|
||||
return MALI_FLUSH_TO_ZERO_MODE_DX11;
|
||||
} else {
|
||||
/* We don't have a "flush FP16, preserve FP32" mode, but APIs
|
||||
* should not be able to generate that.
|
||||
*/
|
||||
assert(!shader->info.ftz_fp16 && !shader->info.ftz_fp32);
|
||||
return MALI_FLUSH_TO_ZERO_MODE_PRESERVE_SUBNORMALS;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static VkResult
|
||||
panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
shader->code_mem = (struct panvk_priv_mem){0};
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
shader->rsd = (struct panvk_priv_mem){0};
|
||||
#else
|
||||
shader->spd = (struct panvk_priv_mem){0};
|
||||
#endif
|
||||
|
||||
if (!shader->bin_size)
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -548,6 +622,7 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
|
|||
shader->code_mem = panvk_pool_upload_aligned(
|
||||
&dev->mempools.exec, shader->bin_ptr, shader->bin_size, 128);
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
if (shader->info.stage == MESA_SHADER_FRAGMENT)
|
||||
return VK_SUCCESS;
|
||||
|
||||
|
|
@ -557,6 +632,73 @@ panvk_shader_upload(struct panvk_device *dev, struct panvk_shader *shader,
|
|||
pan_shader_prepare_rsd(&shader->info, panvk_shader_get_dev_addr(shader),
|
||||
&cfg);
|
||||
}
|
||||
#else
|
||||
if (shader->info.stage != MESA_SHADER_VERTEX) {
|
||||
shader->spd = panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
|
||||
|
||||
pan_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, cfg) {
|
||||
cfg.stage = pan_shader_stage(&shader->info);
|
||||
|
||||
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
|
||||
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
|
||||
else if (cfg.stage == MALI_SHADER_STAGE_VERTEX)
|
||||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
|
||||
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(shader->info.work_reg_count);
|
||||
cfg.binary = panvk_shader_get_dev_addr(shader);
|
||||
cfg.preload.r48_r63 = (shader->info.preload >> 48);
|
||||
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
|
||||
|
||||
if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT)
|
||||
cfg.requires_helper_threads = shader->info.contains_barrier;
|
||||
}
|
||||
} else {
|
||||
shader->spds.pos_points =
|
||||
panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
|
||||
pan_pack(panvk_priv_mem_host_addr(shader->spds.pos_points),
|
||||
SHADER_PROGRAM, cfg) {
|
||||
cfg.stage = pan_shader_stage(&shader->info);
|
||||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(shader->info.work_reg_count);
|
||||
cfg.binary = panvk_shader_get_dev_addr(shader);
|
||||
cfg.preload.r48_r63 = (shader->info.preload >> 48);
|
||||
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
|
||||
}
|
||||
|
||||
shader->spds.pos_triangles =
|
||||
panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
|
||||
pan_pack(panvk_priv_mem_host_addr(shader->spds.pos_triangles),
|
||||
SHADER_PROGRAM, cfg) {
|
||||
cfg.stage = pan_shader_stage(&shader->info);
|
||||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF;
|
||||
cfg.register_allocation =
|
||||
pan_register_allocation(shader->info.work_reg_count);
|
||||
cfg.binary =
|
||||
panvk_shader_get_dev_addr(shader) + shader->info.vs.no_psiz_offset;
|
||||
cfg.preload.r48_r63 = (shader->info.preload >> 48);
|
||||
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
|
||||
}
|
||||
|
||||
if (shader->info.vs.secondary_enable) {
|
||||
shader->spds.var =
|
||||
panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
|
||||
pan_pack(panvk_priv_mem_host_addr(shader->spds.var), SHADER_PROGRAM,
|
||||
cfg) {
|
||||
unsigned work_count = shader->info.vs.secondary_work_reg_count;
|
||||
|
||||
cfg.stage = pan_shader_stage(&shader->info);
|
||||
cfg.vertex_warp_limit = MALI_WARP_LIMIT_FULL;
|
||||
cfg.register_allocation = pan_register_allocation(work_count);
|
||||
cfg.binary = panvk_shader_get_dev_addr(shader) +
|
||||
shader->info.vs.secondary_offset;
|
||||
cfg.preload.r48_r63 = (shader->info.vs.secondary_preload >> 48);
|
||||
cfg.flush_to_zero_mode = shader_ftz_mode(shader);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -573,8 +715,13 @@ panvk_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader,
|
|||
ralloc_free((void *)shader->nir_str);
|
||||
|
||||
panvk_pool_free_mem(&dev->mempools.exec, shader->code_mem);
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
panvk_pool_free_mem(&dev->mempools.exec, shader->rsd);
|
||||
panvk_pool_free_mem(&dev->mempools.exec, shader->desc_info.others.map);
|
||||
#else
|
||||
panvk_pool_free_mem(&dev->mempools.exec, shader->spd);
|
||||
#endif
|
||||
|
||||
free((void *)shader->bin_ptr);
|
||||
vk_shader_free(&dev->vk, pAllocator, &shader->vk);
|
||||
|
|
@ -677,6 +824,8 @@ shader_desc_info_deserialize(struct blob_reader *blob,
|
|||
struct panvk_shader *shader)
|
||||
{
|
||||
shader->desc_info.used_set_mask = blob_read_uint32(blob);
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
shader->desc_info.dyn_ubos.count = blob_read_uint32(blob);
|
||||
blob_copy_bytes(blob, shader->desc_info.dyn_ubos.map,
|
||||
shader->desc_info.dyn_ubos.count);
|
||||
|
|
@ -706,6 +855,11 @@ shader_desc_info_deserialize(struct blob_reader *blob,
|
|||
|
||||
blob_copy_bytes(blob, copy_table, others_count * sizeof(*copy_table));
|
||||
}
|
||||
#else
|
||||
shader->desc_info.dyn_bufs.count = blob_read_uint32(blob);
|
||||
blob_copy_bytes(blob, shader->desc_info.dyn_bufs.map,
|
||||
shader->desc_info.dyn_bufs.count);
|
||||
#endif
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -776,6 +930,8 @@ static void
|
|||
shader_desc_info_serialize(struct blob *blob, const struct panvk_shader *shader)
|
||||
{
|
||||
blob_write_uint32(blob, shader->desc_info.used_set_mask);
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
blob_write_uint32(blob, shader->desc_info.dyn_ubos.count);
|
||||
blob_write_bytes(blob, shader->desc_info.dyn_ubos.map,
|
||||
sizeof(*shader->desc_info.dyn_ubos.map) *
|
||||
|
|
@ -794,6 +950,12 @@ shader_desc_info_serialize(struct blob *blob, const struct panvk_shader *shader)
|
|||
blob_write_bytes(blob,
|
||||
panvk_priv_mem_host_addr(shader->desc_info.others.map),
|
||||
sizeof(uint32_t) * others_count);
|
||||
#else
|
||||
blob_write_uint32(blob, shader->desc_info.dyn_bufs.count);
|
||||
blob_write_bytes(blob, shader->desc_info.dyn_bufs.map,
|
||||
sizeof(*shader->desc_info.dyn_bufs.map) *
|
||||
shader->desc_info.dyn_bufs.count);
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -1070,6 +1232,12 @@ panvk_per_arch(link_shaders)(struct panvk_pool *desc_pool,
|
|||
assert(vs);
|
||||
assert(vs->info.stage == MESA_SHADER_VERTEX);
|
||||
|
||||
if (PAN_ARCH >= 9) {
|
||||
link->buf_strides[PANVK_VARY_BUF_GENERAL] =
|
||||
MAX2(fs->info.varyings.input_count, vs->info.varyings.output_count);
|
||||
return;
|
||||
}
|
||||
|
||||
collect_varyings_info(vs->info.varyings.output,
|
||||
vs->info.varyings.output_count, &out_vars);
|
||||
|
||||
|
|
@ -1157,5 +1325,10 @@ const struct vk_device_shader_ops panvk_per_arch(device_shader_ops) = {
|
|||
.compile = panvk_compile_shaders,
|
||||
.deserialize = panvk_deserialize_shader,
|
||||
.cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
|
||||
|
||||
/* FIXME: make the assignment unconditional when the CSF command buffer logic is
|
||||
* implemented. */
|
||||
#if PAN_ARCH <= 7
|
||||
.cmd_bind_shaders = panvk_per_arch(cmd_bind_shaders),
|
||||
#endif
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue