mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 22:20:14 +01:00
anv: Drop anv_pipeline::use_primitive_replication
Instead of this fragile use_primitive_replication bit which we set differently depending on whether or not we pulled the shader out of the cache, compute and use the information up-front during the compile and then always fetch it from the vue_map after that. This way, regardless of whether the shader comes from the cache or not, we have the same flow and there are no inconsistencies. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17602>
This commit is contained in:
parent
f1768f5640
commit
ae57628dd5
3 changed files with 34 additions and 47 deletions
|
|
@ -691,7 +691,8 @@ static void
|
||||||
anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
||||||
void *mem_ctx,
|
void *mem_ctx,
|
||||||
struct anv_pipeline_stage *stage,
|
struct anv_pipeline_stage *stage,
|
||||||
struct anv_pipeline_layout *layout)
|
struct anv_pipeline_layout *layout,
|
||||||
|
bool use_primitive_replication)
|
||||||
{
|
{
|
||||||
const struct anv_physical_device *pdevice = pipeline->device->physical;
|
const struct anv_physical_device *pdevice = pipeline->device->physical;
|
||||||
const struct brw_compiler *compiler = pdevice->compiler;
|
const struct brw_compiler *compiler = pdevice->compiler;
|
||||||
|
|
@ -714,7 +715,7 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
||||||
struct anv_graphics_pipeline *gfx_pipeline =
|
struct anv_graphics_pipeline *gfx_pipeline =
|
||||||
anv_pipeline_to_graphics(pipeline);
|
anv_pipeline_to_graphics(pipeline);
|
||||||
NIR_PASS(_, nir, anv_nir_lower_multiview, gfx_pipeline->view_mask,
|
NIR_PASS(_, nir, anv_nir_lower_multiview, gfx_pipeline->view_mask,
|
||||||
gfx_pipeline->use_primitive_replication);
|
use_primitive_replication);
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||||
|
|
@ -824,9 +825,13 @@ anv_pipeline_compile_vs(const struct brw_compiler *compiler,
|
||||||
/* When using Primitive Replication for multiview, each view gets its own
|
/* When using Primitive Replication for multiview, each view gets its own
|
||||||
* position slot.
|
* position slot.
|
||||||
*/
|
*/
|
||||||
uint32_t pos_slots = pipeline->use_primitive_replication ?
|
uint32_t pos_slots =
|
||||||
|
(vs_stage->nir->info.per_view_outputs & VARYING_BIT_POS) ?
|
||||||
MAX2(1, util_bitcount(pipeline->view_mask)) : 1;
|
MAX2(1, util_bitcount(pipeline->view_mask)) : 1;
|
||||||
|
|
||||||
|
/* Only position is allowed to be per-view */
|
||||||
|
assert(!(vs_stage->nir->info.per_view_outputs & ~VARYING_BIT_POS));
|
||||||
|
|
||||||
brw_compute_vue_map(compiler->devinfo,
|
brw_compute_vue_map(compiler->devinfo,
|
||||||
&vs_stage->prog_data.vs.base.vue_map,
|
&vs_stage->prog_data.vs.base.vue_map,
|
||||||
vs_stage->nir->info.outputs_written,
|
vs_stage->nir->info.outputs_written,
|
||||||
|
|
@ -1301,29 +1306,6 @@ anv_pipeline_add_executables(struct anv_pipeline *pipeline,
|
||||||
pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries);
|
pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
|
|
||||||
{
|
|
||||||
/* TODO: Cache this pipeline-wide information. */
|
|
||||||
|
|
||||||
if (anv_pipeline_is_primitive(pipeline)) {
|
|
||||||
/* Primitive replication depends on information from all the shaders.
|
|
||||||
* Recover this bit from the fact that we have more than one position slot
|
|
||||||
* in the vertex shader when using it.
|
|
||||||
*/
|
|
||||||
assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
|
|
||||||
int pos_slots = 0;
|
|
||||||
const struct brw_vue_prog_data *vue_prog_data =
|
|
||||||
(const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
|
|
||||||
const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
|
|
||||||
for (int i = 0; i < vue_map->num_slots; i++) {
|
|
||||||
if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
|
|
||||||
pos_slots++;
|
|
||||||
}
|
|
||||||
pipeline->use_primitive_replication = pos_slots > 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
|
anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
|
||||||
const struct vk_graphics_pipeline_state *state,
|
const struct vk_graphics_pipeline_state *state,
|
||||||
|
|
@ -1434,7 +1416,6 @@ anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline
|
||||||
anv_pipeline_add_executables(&pipeline->base, &stages[s],
|
anv_pipeline_add_executables(&pipeline->base, &stages[s],
|
||||||
pipeline->shaders[s]);
|
pipeline->shaders[s]);
|
||||||
}
|
}
|
||||||
anv_pipeline_init_from_cached_graphics(pipeline);
|
|
||||||
return true;
|
return true;
|
||||||
} else if (found > 0) {
|
} else if (found > 0) {
|
||||||
/* We found some but not all of our shaders. This shouldn't happen most
|
/* We found some but not all of our shaders. This shouldn't happen most
|
||||||
|
|
@ -1602,6 +1583,7 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
|
||||||
next_stage = &stages[s];
|
next_stage = &stages[s];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool use_primitive_replication = false;
|
||||||
if (pipeline->base.device->info->ver >= 12 &&
|
if (pipeline->base.device->info->ver >= 12 &&
|
||||||
pipeline->view_mask != 0) {
|
pipeline->view_mask != 0) {
|
||||||
/* For some pipelines HW Primitive Replication can be used instead of
|
/* For some pipelines HW Primitive Replication can be used instead of
|
||||||
|
|
@ -1613,12 +1595,10 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
|
||||||
for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++)
|
for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++)
|
||||||
shaders[s] = stages[s].nir;
|
shaders[s] = stages[s].nir;
|
||||||
|
|
||||||
pipeline->use_primitive_replication =
|
use_primitive_replication =
|
||||||
anv_check_for_primitive_replication(pipeline->base.device,
|
anv_check_for_primitive_replication(pipeline->base.device,
|
||||||
pipeline->active_stages,
|
pipeline->active_stages,
|
||||||
shaders, pipeline->view_mask);
|
shaders, pipeline->view_mask);
|
||||||
} else {
|
|
||||||
pipeline->use_primitive_replication = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct anv_pipeline_stage *prev_stage = NULL;
|
struct anv_pipeline_stage *prev_stage = NULL;
|
||||||
|
|
@ -1631,7 +1611,8 @@ anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
|
||||||
|
|
||||||
void *stage_ctx = ralloc_context(NULL);
|
void *stage_ctx = ralloc_context(NULL);
|
||||||
|
|
||||||
anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
|
anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout,
|
||||||
|
use_primitive_replication);
|
||||||
|
|
||||||
if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
|
if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
|
||||||
prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
|
prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
|
||||||
|
|
@ -1864,7 +1845,8 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
||||||
|
|
||||||
NIR_PASS(_, stage.nir, anv_nir_add_base_work_group_id);
|
NIR_PASS(_, stage.nir, anv_nir_add_base_work_group_id);
|
||||||
|
|
||||||
anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
|
anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout,
|
||||||
|
false /* use_primitive_replication */);
|
||||||
|
|
||||||
unsigned local_size = stage.nir->info.workgroup_size[0] *
|
unsigned local_size = stage.nir->info.workgroup_size[0] *
|
||||||
stage.nir->info.workgroup_size[1] *
|
stage.nir->info.workgroup_size[1] *
|
||||||
|
|
@ -2118,12 +2100,16 @@ anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Our implementation of VK_KHR_multiview uses instancing to draw the
|
/* Our implementation of VK_KHR_multiview uses instancing to draw the
|
||||||
* different views. If the client asks for instancing, we need to multiply
|
* different views when primitive replication cannot be used. If the
|
||||||
* the instance divisor by the number of views ensure that we repeat the
|
* client asks for instancing, we need to multiply by the client's
|
||||||
* client's per-instance data once for each view.
|
* instance count at draw time and instance divisor in the vertex
|
||||||
|
* bindings by the number of views ensure that we repeat the client's
|
||||||
|
* per-instance data once for each view.
|
||||||
*/
|
*/
|
||||||
|
const bool uses_primitive_replication =
|
||||||
|
anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1;
|
||||||
pipeline->instance_multiplier = 1;
|
pipeline->instance_multiplier = 1;
|
||||||
if (pipeline->view_mask && !pipeline->use_primitive_replication)
|
if (pipeline->view_mask && !uses_primitive_replication)
|
||||||
pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
|
pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
|
||||||
} else {
|
} else {
|
||||||
assert(anv_pipeline_is_mesh(pipeline));
|
assert(anv_pipeline_is_mesh(pipeline));
|
||||||
|
|
@ -2540,7 +2526,8 @@ anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
|
||||||
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
|
anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i],
|
||||||
|
layout, false /* use_primitive_replication */);
|
||||||
|
|
||||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3123,11 +3123,6 @@ struct anv_graphics_pipeline {
|
||||||
bool force_fragment_thread_dispatch;
|
bool force_fragment_thread_dispatch;
|
||||||
bool negative_one_to_one;
|
bool negative_one_to_one;
|
||||||
|
|
||||||
/* When primitive replication is used, subpass->view_mask will describe what
|
|
||||||
* views to replicate.
|
|
||||||
*/
|
|
||||||
bool use_primitive_replication;
|
|
||||||
|
|
||||||
uint32_t vb_used;
|
uint32_t vb_used;
|
||||||
struct anv_pipeline_vertex_binding {
|
struct anv_pipeline_vertex_binding {
|
||||||
uint32_t stride;
|
uint32_t stride;
|
||||||
|
|
|
||||||
|
|
@ -2142,17 +2142,22 @@ static void
|
||||||
emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline,
|
emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline,
|
||||||
const struct vk_render_pass_state *rp)
|
const struct vk_render_pass_state *rp)
|
||||||
{
|
{
|
||||||
if (!pipeline->use_primitive_replication) {
|
const int replication_count =
|
||||||
|
anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots;
|
||||||
|
|
||||||
|
assert(replication_count >= 1);
|
||||||
|
if (replication_count == 1) {
|
||||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int view_count = util_bitcount(rp->view_mask);
|
uint32_t view_mask = rp->view_mask;
|
||||||
assert(view_count > 1 && view_count <= MAX_VIEWS_FOR_PRIMITIVE_REPLICATION);
|
assert(replication_count == util_bitcount(view_mask));
|
||||||
|
assert(replication_count <= MAX_VIEWS_FOR_PRIMITIVE_REPLICATION);
|
||||||
|
|
||||||
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) {
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) {
|
||||||
pr.ReplicaMask = (1 << view_count) - 1;
|
pr.ReplicaMask = (1 << replication_count) - 1;
|
||||||
pr.ReplicationCount = view_count - 1;
|
pr.ReplicationCount = replication_count - 1;
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
u_foreach_bit(view_index, rp->view_mask) {
|
u_foreach_bit(view_index, rp->view_mask) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue