anv/cmd_buffer: Add support for pushing UBO ranges

In order to do this we have to modify push constant set up to handle
ranges.  We also have to tweak the way we handle dirty bits a bit so
that we re-push whenever a descriptor set changes.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
Jason Ekstrand 2017-12-01 14:28:46 -08:00
parent 0c879b62b0
commit 2f9eb045f3
2 changed files with 112 additions and 33 deletions

View file

@ -1843,9 +1843,12 @@ cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
}
}
static uint32_t
cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
static void
cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
VkShaderStageFlags dirty_stages)
{
UNUSED const struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
static const uint32_t push_constant_opcodes[] = {
[MESA_SHADER_VERTEX] = 21,
[MESA_SHADER_TESS_CTRL] = 25, /* HS */
@ -1857,39 +1860,117 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
VkShaderStageFlags flushed = 0;
anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) {
if (stage == MESA_SHADER_COMPUTE)
continue;
anv_foreach_stage(stage, dirty_stages) {
assert(stage < ARRAY_SIZE(push_constant_opcodes));
assert(push_constant_opcodes[stage] > 0);
struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c) {
c._3DCommandSubOpcode = push_constant_opcodes[stage];
if (state.offset == 0) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c)
c._3DCommandSubOpcode = push_constant_opcodes[stage];
} else {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c) {
c._3DCommandSubOpcode = push_constant_opcodes[stage],
c.ConstantBody = (struct GENX(3DSTATE_CONSTANT_BODY)) {
#if GEN_GEN >= 9
.Buffer[2] = { &cmd_buffer->device->dynamic_state_pool.block_pool.bo, state.offset },
.ReadLength[2] = DIV_ROUND_UP(state.alloc_size, 32),
if (anv_pipeline_has_stage(cmd_buffer->state.pipeline, stage)) {
#if GEN_GEN >= 8 || GEN_IS_HASWELL
const struct brw_stage_prog_data *prog_data =
pipeline->shaders[stage]->prog_data;
const struct anv_pipeline_bind_map *bind_map =
&pipeline->shaders[stage]->bind_map;
/* The Skylake PRM contains the following restriction:
*
* "The driver must ensure The following case does not occur
* without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
* buffer 3 read length equal to zero committed followed by a
* 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
* zero committed."
*
* To avoid this, we program the buffers in the highest slots.
* This way, slot 0 is only used if slot 3 is also used.
*/
int n = 3;
for (int i = 3; i >= 0; i--) {
const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
if (range->length == 0)
continue;
const unsigned surface =
prog_data->binding_table.ubo_start + range->block;
assert(surface <= bind_map->surface_count);
const struct anv_pipeline_binding *binding =
&bind_map->surface_to_descriptor[surface];
const struct anv_descriptor *desc =
anv_descriptor_for_binding(cmd_buffer, binding);
struct anv_address read_addr;
uint32_t read_len;
if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
read_len = MIN2(range->length,
DIV_ROUND_UP(desc->buffer_view->range, 32) - range->start);
read_addr = (struct anv_address) {
.bo = desc->buffer_view->bo,
.offset = desc->buffer_view->offset +
range->start * 32,
};
} else {
assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
uint32_t dynamic_offset =
dynamic_offset_for_binding(cmd_buffer, pipeline, binding);
uint32_t buf_offset =
MIN2(desc->offset + dynamic_offset, desc->buffer->size);
uint32_t buf_range =
MIN2(desc->range, desc->buffer->size - buf_offset);
read_len = MIN2(range->length,
DIV_ROUND_UP(buf_range, 32) - range->start);
read_addr = (struct anv_address) {
.bo = desc->buffer->bo,
.offset = desc->buffer->offset + buf_offset +
range->start * 32,
};
}
if (read_len > 0) {
c.ConstantBody.Buffer[n] = read_addr;
c.ConstantBody.ReadLength[n] = read_len;
n--;
}
}
struct anv_state state =
anv_cmd_buffer_push_constants(cmd_buffer, stage);
if (state.alloc_size > 0) {
c.ConstantBody.Buffer[n] = (struct anv_address) {
.bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
.offset = state.offset,
};
c.ConstantBody.ReadLength[n] =
DIV_ROUND_UP(state.alloc_size, 32);
}
#else
.Buffer[0] = { .offset = state.offset },
.ReadLength[0] = DIV_ROUND_UP(state.alloc_size, 32),
/* For Ivy Bridge, the push constants packets have a different
* rule that would require us to iterate in the other direction
* and possibly mess around with dynamic state base address.
* Don't bother; just emit regular push constants at n = 0.
*/
struct anv_state state =
anv_cmd_buffer_push_constants(cmd_buffer, stage);
if (state.alloc_size > 0) {
c.ConstantBody.Buffer[0].offset = state.offset,
c.ConstantBody.ReadLength[0] =
DIV_ROUND_UP(state.alloc_size, 32);
}
#endif
};
}
}
flushed |= mesa_to_vk_shader_stage(stage);
}
cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_ALL_GRAPHICS;
return flushed;
cmd_buffer->state.push_constants_dirty &= ~flushed;
}
void
@ -2003,16 +2084,13 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
if (cmd_buffer->state.descriptors_dirty)
dirty = flush_descriptor_sets(cmd_buffer);
if (cmd_buffer->state.push_constants_dirty) {
#if GEN_GEN >= 9
/* On Sky Lake and later, the binding table pointers commands are
* what actually flush the changes to push constant state so we need
* to dirty them so they get re-emitted below.
if (dirty || cmd_buffer->state.push_constants_dirty) {
/* Because we're pushing UBOs, we have to push whenever either
* descriptors or push constants is dirty.
*/
dirty |= cmd_buffer_flush_push_constants(cmd_buffer);
#else
cmd_buffer_flush_push_constants(cmd_buffer);
#endif
dirty |= cmd_buffer->state.push_constants_dirty;
dirty &= ANV_STAGE_MASK & VK_SHADER_STAGE_ALL_GRAPHICS;
cmd_buffer_flush_push_constants(cmd_buffer, dirty);
}
if (dirty)

View file

@ -1480,7 +1480,8 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
ps.VectorMaskEnable = true;
ps.SamplerCount = get_sampler_count(fs_bin);
ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin);
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0;
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length;
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
POSOFFSET_SAMPLE: POSOFFSET_NONE;
#if GEN_GEN < 8