panvk: Use a flat sysvals struct

PanVK uses fewer sysvals than the GLES driver, as some data that would
be a data in GLES is instead part of the descriptor set or the pipeline
state in Vulkan. Therefore, it is simpler and more efficient to use a
flat, fixed layout provided by the driver for our sysvals, rather than
the compiler choosing a layout.

This commit switches to a flat sysval layout.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16276>
This commit is contained in:
Jason Ekstrand 2022-04-28 17:02:20 -05:00 committed by Marge Bot
parent e6091cc578
commit c32ddb5e77
8 changed files with 176 additions and 128 deletions

View file

@ -88,6 +88,37 @@ panvk_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
}
}
static void
panvk_set_ssbo_pointers(struct panvk_descriptor_state *desc_state,
unsigned ssbo_offset,
unsigned dyn_ssbo_offset,
struct panvk_descriptor_set *set)
{
struct panvk_sysvals *sysvals = &desc_state->sysvals;
unsigned ssbo_idx = ssbo_offset;
for (unsigned i = 0; i < set->layout->num_ssbos; i++) {
const struct panvk_buffer_desc *ssbo = &set->ssbos[i];
sysvals->ssbos[ssbo_idx++] = (struct panvk_ssbo_addr) {
.base_addr = panvk_buffer_gpu_ptr(ssbo->buffer, ssbo->offset),
.size = panvk_buffer_range(ssbo->buffer, ssbo->offset, ssbo->size),
};
}
for (unsigned i = 0; i < set->layout->num_dyn_ssbos; i++) {
const struct panvk_buffer_desc *ssbo =
&desc_state->dyn.ssbos[dyn_ssbo_offset + i];
sysvals->ssbos[ssbo_idx++] = (struct panvk_ssbo_addr) {
.base_addr = panvk_buffer_gpu_ptr(ssbo->buffer, ssbo->offset),
.size = panvk_buffer_range(ssbo->buffer, ssbo->offset, ssbo->size),
};
}
desc_state->sysvals_ptr = 0;
}
void
panvk_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@ -134,6 +165,13 @@ panvk_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
}
}
if (set->layout->num_ssbos || set->layout->num_dyn_ssbos) {
panvk_set_ssbo_pointers(descriptors_state,
playout->sets[idx].ssbo_offset,
playout->sets[idx].dyn_ssbo_offset,
set);
}
if (set->layout->num_ssbos || set->layout->num_dyn_ssbos)
descriptors_state->dirty |= PANVK_DYNAMIC_SSBO;
@ -195,23 +233,24 @@ panvk_CmdBindPipeline(VkCommandBuffer commandBuffer,
cmdbuf->bind_points[pipelineBindPoint].pipeline = pipeline;
cmdbuf->state.fs_rsd = 0;
memset(cmdbuf->bind_points[pipelineBindPoint].desc_state.sysvals, 0,
sizeof(cmdbuf->bind_points[0].desc_state.sysvals));
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
cmdbuf->state.varyings = pipeline->varyings;
if (!(pipeline->dynamic_state_mask & BITFIELD_BIT(VK_DYNAMIC_STATE_VIEWPORT)))
if (!(pipeline->dynamic_state_mask & BITFIELD_BIT(VK_DYNAMIC_STATE_VIEWPORT))) {
cmdbuf->state.viewport = pipeline->viewport;
if (!(pipeline->dynamic_state_mask & BITFIELD_BIT(VK_DYNAMIC_STATE_SCISSOR)))
cmdbuf->state.dirty |= PANVK_DYNAMIC_VIEWPORT;
}
if (!(pipeline->dynamic_state_mask & BITFIELD_BIT(VK_DYNAMIC_STATE_SCISSOR))) {
cmdbuf->state.scissor = pipeline->scissor;
cmdbuf->state.dirty |= PANVK_DYNAMIC_SCISSOR;
}
}
/* Sysvals are passed through UBOs, we need dirty the UBO array if the
* pipeline contain shaders using sysvals.
*/
if (pipeline->num_sysvals)
cmdbuf->bind_points[pipelineBindPoint].desc_state.ubos = 0;
cmdbuf->bind_points[pipelineBindPoint].desc_state.ubos = 0;
}
void

View file

@ -41,7 +41,7 @@
*/
void
panvk_sysval_upload_viewport_scale(const VkViewport *viewport,
union panvk_sysval_data *data)
union panvk_sysval_vec4 *data)
{
data->f32[0] = 0.5f * viewport->width;
data->f32[1] = 0.5f * viewport->height;
@ -59,7 +59,7 @@ panvk_sysval_upload_viewport_scale(const VkViewport *viewport,
*/
void
panvk_sysval_upload_viewport_offset(const VkViewport *viewport,
union panvk_sysval_data *data)
union panvk_sysval_vec4 *data)
{
data->f32[0] = (0.5f * viewport->width) + viewport->x;
data->f32[1] = (0.5f * viewport->height) + viewport->y;

View file

@ -85,10 +85,10 @@ panvk_per_arch(translate_sampler_compare_func)(const VkSamplerCreateInfo *pCreat
void
panvk_sysval_upload_viewport_scale(const VkViewport *viewport,
union panvk_sysval_data *data);
union panvk_sysval_vec4 *data);
void
panvk_sysval_upload_viewport_offset(const VkViewport *viewport,
union panvk_sysval_data *data);
union panvk_sysval_vec4 *data);
#endif

View file

@ -569,14 +569,57 @@ enum panvk_dynamic_state_bits {
PANVK_DYNAMIC_ALL = (1 << 12) - 1,
};
/* This has to match nir_address_format_64bit_bounded_global */
struct panvk_ssbo_addr {
uint64_t base_addr;
uint32_t size;
uint32_t zero; /* Must be zero! */
};
union panvk_sysval_vec4 {
float f32[4];
uint32_t u32[4];
};
struct panvk_sysvals {
union {
struct {
/* Only for graphics */
union panvk_sysval_vec4 viewport_scale;
union panvk_sysval_vec4 viewport_offset;
union {
union panvk_sysval_vec4 vertex_instance_offsets;
struct {
uint32_t first_vertex;
uint32_t base_vertex;
uint32_t base_instance;
};
};
union panvk_sysval_vec4 blend_constants;
};
struct {
/* Only for compute */
union panvk_sysval_vec4 num_work_groups;
union panvk_sysval_vec4 local_group_size;
};
};
/* This will be gone in the next commit */
#define MAX_SSBOS 28
struct panvk_ssbo_addr ssbos[MAX_SSBOS];
};
struct panvk_descriptor_state {
uint32_t dirty;
const struct panvk_descriptor_set *sets[MAX_SETS];
struct panvk_sysvals sysvals;
struct {
struct panvk_buffer_desc ubos[MAX_DYNAMIC_UNIFORM_BUFFERS];
struct panvk_buffer_desc ssbos[MAX_DYNAMIC_STORAGE_BUFFERS];
} dyn;
mali_ptr sysvals[MESA_SHADER_STAGES];
mali_ptr sysvals_ptr;
mali_ptr ubos;
mali_ptr textures;
mali_ptr samplers;
@ -715,10 +758,6 @@ struct panvk_cmd_state {
bool crc_valid[MAX_RTS];
} fb;
struct {
struct pan_compute_dim wg_count;
} compute;
const struct panvk_render_pass *pass;
const struct panvk_subpass *subpass;
const struct panvk_framebuffer *framebuffer;
@ -836,13 +875,6 @@ panvk_shader_destroy(struct panvk_device *dev,
struct panvk_shader *shader,
const VkAllocationCallbacks *alloc);
union panvk_sysval_data {
float f32[4];
double f64[2];
uint32_t u32[4];
uint64_t u64[2];
};
#define RSD_WORDS 16
#define BLEND_DESC_WORDS 4
@ -868,7 +900,6 @@ struct panvk_pipeline {
uint32_t img_access_mask;
unsigned num_ubos;
unsigned num_sysvals;
struct {
unsigned ubo_idx;

View file

@ -312,75 +312,34 @@ panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
}
static void
panvk_sysval_upload_ssbo_info(struct panvk_cmd_buffer *cmdbuf,
unsigned ssbo_id,
struct panvk_cmd_bind_point_state *bind_point_state,
union panvk_sysval_data *data)
panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state,
struct panvk_draw_info *draw)
{
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
const struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
struct panvk_sysvals *sysvals = &bind_point_state->desc_state.sysvals;
for (unsigned s = 0; s < pipeline->layout->num_sets; s++) {
unsigned ssbo_offset = pipeline->layout->sets[s].ssbo_offset;
unsigned num_ssbos = pipeline->layout->sets[s].layout->num_ssbos;
unsigned dyn_ssbo_offset = pipeline->layout->sets[s].dyn_ssbo_offset + pipeline->layout->num_ssbos;
unsigned num_dyn_ssbos = pipeline->layout->sets[s].layout->num_dyn_ssbos;
const struct panvk_buffer_desc *ssbo = NULL;
if (ssbo_id >= ssbo_offset && ssbo_id < (ssbo_offset + num_ssbos))
ssbo = &desc_state->sets[s]->ssbos[ssbo_id - ssbo_offset];
else if (ssbo_id >= dyn_ssbo_offset && ssbo_id < (dyn_ssbo_offset + num_dyn_ssbos))
ssbo = &desc_state->dyn.ssbos[ssbo_id - pipeline->layout->num_ssbos];
if (ssbo) {
data->u64[0] = panvk_buffer_gpu_ptr(ssbo->buffer, ssbo->offset);
data->u32[2] = panvk_buffer_range(ssbo->buffer,
ssbo->offset, ssbo->size);
}
unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
if (sysvals->first_vertex != draw->offset_start ||
sysvals->base_vertex != base_vertex ||
sysvals->base_instance != draw->first_instance) {
sysvals->first_vertex = draw->offset_start;
sysvals->base_vertex = base_vertex;
sysvals->base_instance = draw->first_instance;
bind_point_state->desc_state.sysvals_ptr = 0;
}
}
static void
panvk_cmd_upload_sysval(struct panvk_cmd_buffer *cmdbuf,
unsigned id,
struct panvk_cmd_bind_point_state *bind_point_state,
union panvk_sysval_data *data)
{
switch (PAN_SYSVAL_TYPE(id)) {
case PAN_SYSVAL_VIEWPORT_SCALE:
panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport, data);
break;
case PAN_SYSVAL_VIEWPORT_OFFSET:
panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, data);
break;
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
data->u32[0] = cmdbuf->state.ib.first_vertex;
data->u32[1] = cmdbuf->state.ib.base_vertex;
data->u32[2] = cmdbuf->state.ib.base_instance;
break;
case PAN_SYSVAL_BLEND_CONSTANTS:
memcpy(data->f32, cmdbuf->state.blend.constants, sizeof(data->f32));
break;
case PAN_SYSVAL_SSBO:
/* This won't work with dynamic SSBO indexing. We might want to
* consider storing SSBO mappings in a separate UBO if we need to
* support
* VkPhysicalDeviceVulkan12Features.shaderStorageBufferArrayNonUniformIndexing.
*/
panvk_sysval_upload_ssbo_info(cmdbuf, PAN_SYSVAL_ID(id), bind_point_state, data);
break;
case PAN_SYSVAL_NUM_WORK_GROUPS:
data->u32[0] = cmdbuf->state.compute.wg_count.x;
data->u32[1] = cmdbuf->state.compute.wg_count.y;
data->u32[2] = cmdbuf->state.compute.wg_count.z;
break;
case PAN_SYSVAL_LOCAL_GROUP_SIZE:
data->u32[0] = bind_point_state->pipeline->cs.local_size.x;
data->u32[1] = bind_point_state->pipeline->cs.local_size.y;
data->u32[2] = bind_point_state->pipeline->cs.local_size.z;
break;
default:
unreachable("Invalid static sysval");
if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) {
memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants,
sizeof(cmdbuf->state.blend.constants));
bind_point_state->desc_state.sysvals_ptr = 0;
}
if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) {
panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport,
&sysvals->viewport_scale);
panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport,
&sysvals->viewport_offset);
bind_point_state->desc_state.sysvals_ptr = 0;
}
}
@ -389,29 +348,15 @@ panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
{
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
if (!pipeline->num_sysvals)
if (desc_state->sysvals_ptr)
return;
uint32_t dirty = cmdbuf->state.dirty | desc_state->dirty;
for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sysvals); i++) {
unsigned sysval_count = pipeline->sysvals[i].ids.sysval_count;
if (!sysval_count || desc_state->sysvals[i])
continue;
struct panfrost_ptr sysvals =
pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, sysval_count * 16, 16);
union panvk_sysval_data *data = sysvals.cpu;
for (unsigned s = 0; s < pipeline->sysvals[i].ids.sysval_count; s++) {
panvk_cmd_upload_sysval(cmdbuf, pipeline->sysvals[i].ids.sysvals[s],
bind_point_state, &data[s]);
}
desc_state->sysvals[i] = sysvals.gpu;
}
struct panfrost_ptr sysvals =
pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
sizeof(desc_state->sysvals), 16);
memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals));
desc_state->sysvals_ptr = sysvals.gpu;
}
static void
@ -919,16 +864,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf,
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
if (cmdbuf->state.ib.first_vertex != draw->offset_start ||
cmdbuf->state.ib.base_vertex != base_vertex ||
cmdbuf->state.ib.base_vertex != draw->first_instance) {
cmdbuf->state.ib.base_vertex = base_vertex;
cmdbuf->state.ib.base_instance = draw->first_instance;
cmdbuf->state.ib.first_vertex = draw->offset_start;
cmdbuf->state.dirty |= PANVK_DYNAMIC_VERTEX_INSTANCE_OFFSETS;
}
panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw);
panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
@ -1537,7 +1473,15 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer,
struct panfrost_ptr job =
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB);
cmdbuf->state.compute.wg_count = dispatch.wg_count;
struct panvk_sysvals *sysvals = &desc_state->sysvals;
sysvals->num_work_groups.u32[0] = x;
sysvals->num_work_groups.u32[1] = y;
sysvals->num_work_groups.u32[2] = z;
sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x;
sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y;
sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z;
desc_state->sysvals_ptr = 0;
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
dispatch.tsd = batch->tls.gpu;

View file

@ -419,11 +419,8 @@ panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline,
}
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) {
if (!pipeline->sysvals[i].ids.sysval_count)
continue;
panvk_per_arch(emit_ubo)(state->sysvals[i],
pipeline->sysvals[i].ids.sysval_count * 16,
panvk_per_arch(emit_ubo)(state->sysvals_ptr,
sizeof(state->sysvals),
&ubos[pipeline->sysvals[i].ubo_idx]);
}

View file

@ -324,10 +324,6 @@ panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
if (pipeline->sysvals[i].ids.sysval_count)
pipeline->num_ubos = MAX2(pipeline->num_ubos, pipeline->sysvals[i].ubo_idx + 1);
}
pipeline->num_sysvals = 0;
for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++)
pipeline->num_sysvals += pipeline->sysvals[i].ids.sysval_count;
}

View file

@ -44,6 +44,39 @@
#include "vk_util.h"
static void
panvk_init_sysvals(struct panfrost_sysvals *sysvals,
gl_shader_stage stage)
{
memset(sysvals, 0, sizeof(*sysvals));
#define SYSVAL_SLOT(name) \
(assert(offsetof(struct panvk_sysvals, name) % 16 == 0), \
offsetof(struct panvk_sysvals, name) / 16)
#define INIT_SYSVAL(name, SYSVAL) \
sysvals->sysvals[SYSVAL_SLOT(name)] = PAN_SYSVAL_##SYSVAL
if (gl_shader_stage_is_compute(stage)) {
INIT_SYSVAL(num_work_groups, NUM_WORK_GROUPS);
INIT_SYSVAL(local_group_size, LOCAL_GROUP_SIZE);
} else {
INIT_SYSVAL(viewport_scale, VIEWPORT_SCALE);
INIT_SYSVAL(viewport_offset, VIEWPORT_OFFSET);
INIT_SYSVAL(vertex_instance_offsets, VERTEX_INSTANCE_OFFSETS);
INIT_SYSVAL(blend_constants, BLEND_CONSTANTS);
}
int ssbo0_slot = SYSVAL_SLOT(ssbos);
for (unsigned i = 0; i < MAX_SSBOS; i++)
sysvals->sysvals[ssbo0_slot + i] = PAN_SYSVAL(SSBO, i);
#undef SYSVAL_SLOT
#undef INIT_SYSVAL
sysvals->sysval_count = sizeof(struct panvk_sysvals) / 16;
}
static bool
panvk_inline_blend_constants(nir_builder *b, nir_instr *instr, void *data)
{
@ -341,11 +374,15 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
};
NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
struct panfrost_sysvals fixed_sysvals;
panvk_init_sysvals(&fixed_sysvals, stage);
struct panfrost_compile_inputs inputs = {
.gpu_id = pdev->gpu_id,
.no_ubo_to_push = true,
.no_idvs = true, /* TODO */
.fixed_sysval_ubo = sysval_ubo,
.fixed_sysval_layout = &fixed_sysvals,
};
NIR_PASS_V(nir, nir_lower_indirect_derefs,
@ -413,6 +450,10 @@ panvk_per_arch(shader_create)(struct panvk_device *dev,
GENX(pan_shader_compile)(nir, &inputs, &shader->binary, &shader->info);
/* System values shouldn't have changed */
assert(memcmp(&shader->info.sysvals, &fixed_sysvals,
sizeof(fixed_sysvals)) == 0);
/* Patch the descriptor count */
shader->info.ubo_count =
shader->info.sysvals.sysval_count ? sysval_ubo + 1 : layout->num_ubos;