intel: prepare VUE layout for more than 2 layouts

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34109>
This commit is contained in:
Lionel Landwerlin 2025-04-29 17:40:22 +03:00 committed by Marge Bot
parent 95efdca00b
commit 2d396f6085
19 changed files with 196 additions and 93 deletions

View file

@ -1204,7 +1204,9 @@ crocus_compile_vs(struct crocus_context *ice,
crocus_vs_outputs_written(ice, key, nir->info.outputs_written);
elk_compute_vue_map(devinfo,
&vue_prog_data->vue_map, outputs_written,
nir->info.separate_shader, /* pos slots */ 1);
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1);
/* Don't tell the backend about our clip plane constants, we've already
* lowered them in NIR and we don't want it doing it again.
@ -1694,7 +1696,9 @@ crocus_compile_gs(struct crocus_context *ice,
elk_compute_vue_map(devinfo,
&vue_prog_data->vue_map, nir->info.outputs_written,
nir->info.separate_shader, /* pos slots */ 1);
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1);
if (devinfo->ver == 6)
gfx6_gs_xfb_setup(&ish->stream_output, gs_prog_data);
@ -1969,7 +1973,7 @@ update_last_vue_map(struct crocus_context *ice,
ice->state.stage_dirty_for_nos[CROCUS_NOS_LAST_VUE_MAP];
}
if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
if (changed_slots || (old_map && old_map->layout != vue_map->layout)) {
ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
if (devinfo->ver < 6)
ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
@ -2872,7 +2876,7 @@ crocus_create_fs_state(struct pipe_context *ctx,
if (devinfo->ver < 6) {
elk_compute_vue_map(devinfo, &vue_map,
info->inputs_read | VARYING_BIT_POS,
false, /* pos slots */ 1);
INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1);
}
if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key)))
crocus_compile_fs(ice, ish, &key, &vue_map);

View file

@ -231,7 +231,8 @@ struct iris_vue_prog_key {
struct iris_base_prog_key base;
unsigned nr_userclip_plane_consts:4;
unsigned padding:28;
enum intel_vue_layout layout:2;
unsigned padding:26;
};
struct iris_vs_prog_key {
@ -284,7 +285,8 @@ struct iris_fs_prog_key {
bool multisample_fbo:1;
bool force_dual_color_blend:1;
bool coherent_fb_fetch:1;
uint64_t padding:43;
enum intel_vue_layout vue_layout:2;
uint64_t padding:41;
};
struct iris_cs_prog_key {

View file

@ -55,12 +55,20 @@
#include "iris_pipe.h"
#include "nir/tgsi_to_nir.h"
#define KEY_INIT(prefix) \
.prefix.program_string_id = ish->program_id, \
.prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
#define BRW_KEY_INIT(gen, prog_id, limit_trig_input) \
static inline enum intel_vue_layout
vue_layout(bool separate_shader)
{
return separate_shader ? INTEL_VUE_LAYOUT_SEPARATE : INTEL_VUE_LAYOUT_FIXED;
}
#define KEY_INIT(prefix) \
.prefix.program_string_id = ish->program_id, \
.prefix.limit_trig_input_range = \
screen->driconf.limit_trig_input_range
#define BRW_KEY_INIT(gen, prog_id, limit_trig_input, _vue_layout) \
.base.program_string_id = prog_id, \
.base.limit_trig_input_range = limit_trig_input
.base.limit_trig_input_range = limit_trig_input, \
.base.vue_layout = _vue_layout
#ifdef INTEL_USE_ELK
#define ELK_KEY_INIT(gen, prog_id, limit_trig_input) \
@ -525,7 +533,8 @@ iris_to_brw_vs_key(const struct iris_screen *screen,
{
return (struct brw_vs_prog_key) {
BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
key->vue.base.limit_trig_input_range),
key->vue.base.limit_trig_input_range,
key->vue.layout),
};
}
@ -535,7 +544,8 @@ iris_to_brw_tcs_key(const struct iris_screen *screen,
{
return (struct brw_tcs_prog_key) {
BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
key->vue.base.limit_trig_input_range),
key->vue.base.limit_trig_input_range,
key->vue.layout),
._tes_primitive_mode = key->_tes_primitive_mode,
.input_vertices = key->input_vertices,
.patch_outputs_written = key->patch_outputs_written,
@ -549,7 +559,8 @@ iris_to_brw_tes_key(const struct iris_screen *screen,
{
return (struct brw_tes_prog_key) {
BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
key->vue.base.limit_trig_input_range),
key->vue.base.limit_trig_input_range,
key->vue.layout),
.patch_inputs_read = key->patch_inputs_read,
.inputs_read = key->inputs_read,
};
@ -561,7 +572,8 @@ iris_to_brw_gs_key(const struct iris_screen *screen,
{
return (struct brw_gs_prog_key) {
BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id,
key->vue.base.limit_trig_input_range),
key->vue.base.limit_trig_input_range,
key->vue.layout),
};
}
@ -571,7 +583,8 @@ iris_to_brw_fs_key(const struct iris_screen *screen,
{
return (struct brw_wm_prog_key) {
BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
key->base.limit_trig_input_range),
key->base.limit_trig_input_range,
key->vue_layout),
.nr_color_regions = key->nr_color_regions,
.flat_shade = key->flat_shade,
.alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
@ -595,7 +608,8 @@ iris_to_brw_cs_key(const struct iris_screen *screen,
{
return (struct brw_cs_prog_key) {
BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id,
key->base.limit_trig_input_range),
key->base.limit_trig_input_range,
INTEL_VUE_LAYOUT_SEPARATE),
};
}
@ -1884,7 +1898,7 @@ iris_compile_vs(struct iris_screen *screen,
brw_compute_vue_map(devinfo,
&brw_prog_data->base.vue_map, nir->info.outputs_written,
nir->info.separate_shader, /* pos_slots */ 1);
key->vue.layout, /* pos_slots */ 1);
struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
@ -1916,7 +1930,9 @@ iris_compile_vs(struct iris_screen *screen,
elk_compute_vue_map(devinfo,
&elk_prog_data->base.vue_map, nir->info.outputs_written,
nir->info.separate_shader, /* pos_slots */ 1);
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED, /* pos_slots */ 1);
struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key);
@ -1983,7 +1999,10 @@ iris_update_compiled_vs(struct iris_context *ice)
struct iris_uncompiled_shader *ish =
ice->shaders.uncompiled[MESA_SHADER_VERTEX];
struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
struct iris_vs_prog_key key = {
KEY_INIT(vue.base),
.vue.layout = vue_layout(ish->nir->info.separate_shader),
};
screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
@ -2208,6 +2227,7 @@ iris_update_compiled_tcs(struct iris_context *ice)
iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
struct iris_tcs_prog_key key = {
.vue.base.program_string_id = tcs ? tcs->program_id : 0,
.vue.layout = vue_layout(tcs ? tcs->nir->info.separate_shader : false),
._tes_primitive_mode = tes_info->tess._primitive_mode,
.input_vertices =
!tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0,
@ -2416,7 +2436,10 @@ iris_update_compiled_tes(struct iris_context *ice)
struct iris_uncompiled_shader *ish =
ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
struct iris_tes_prog_key key = {
KEY_INIT(vue.base),
.vue.layout = vue_layout(ish->nir->info.separate_shader),
};
get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
@ -2500,7 +2523,7 @@ iris_compile_gs(struct iris_screen *screen,
brw_compute_vue_map(devinfo,
&brw_prog_data->base.vue_map, nir->info.outputs_written,
nir->info.separate_shader, /* pos_slots */ 1);
key->vue.layout, /* pos_slots */ 1);
struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
@ -2530,7 +2553,9 @@ iris_compile_gs(struct iris_screen *screen,
elk_compute_vue_map(devinfo,
&elk_prog_data->base.vue_map, nir->info.outputs_written,
nir->info.separate_shader, /* pos_slots */ 1);
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED, /* pos_slots */ 1);
struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key);
@ -2600,7 +2625,10 @@ iris_update_compiled_gs(struct iris_context *ice)
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
if (ish) {
struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
struct iris_gs_prog_key key = {
KEY_INIT(vue.base),
.vue.layout = vue_layout(ish->nir->info.separate_shader),
};
screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
bool added;
@ -2777,7 +2805,10 @@ iris_update_compiled_fs(struct iris_context *ice)
struct iris_uncompiled_shader *ish =
ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
struct iris_fs_prog_key key = { KEY_INIT(base) };
struct iris_fs_prog_key key = {
KEY_INIT(base),
.vue_layout = vue_layout(ish->nir->info.separate_shader),
};
screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
struct intel_vue_map *last_vue_map =
@ -2847,7 +2878,7 @@ update_last_vue_map(struct iris_context *ice,
ice->state.dirty |= IRIS_DIRTY_CLIP;
}
if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
if (changed_slots || (old_map && old_map->layout != vue_map->layout)) {
ice->state.dirty |= IRIS_DIRTY_SBE;
}
@ -3429,13 +3460,17 @@ iris_create_shader_state(struct pipe_context *ctx,
if (info->clip_distance_array_size == 0)
ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
key.vs = (struct iris_vs_prog_key) {
KEY_INIT(vue.base),
.vue.layout = vue_layout(ish->nir->info.separate_shader),
};
key_size = sizeof(key.vs);
break;
case MESA_SHADER_TESS_CTRL: {
key.tcs = (struct iris_tcs_prog_key) {
KEY_INIT(vue.base),
.vue.layout = vue_layout(ish->nir->info.separate_shader),
// XXX: make sure the linker fills this out from the TES...
._tes_primitive_mode =
info->tess._primitive_mode ? info->tess._primitive_mode
@ -3463,6 +3498,7 @@ iris_create_shader_state(struct pipe_context *ctx,
key.tes = (struct iris_tes_prog_key) {
KEY_INIT(vue.base),
.vue.layout = vue_layout(ish->nir->info.separate_shader),
// XXX: not ideal, need TCS output/TES input unification
.inputs_read = info->inputs_read,
.patch_inputs_read = info->patch_inputs_read,
@ -3474,7 +3510,10 @@ iris_create_shader_state(struct pipe_context *ctx,
case MESA_SHADER_GEOMETRY:
ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
key.gs = (struct iris_gs_prog_key) {
KEY_INIT(vue.base),
.vue.layout = vue_layout(ish->nir->info.separate_shader),
};
key_size = sizeof(key.gs);
break;
@ -3505,6 +3544,7 @@ iris_create_shader_state(struct pipe_context *ctx,
key.fs = (struct iris_fs_prog_key) {
KEY_INIT(base),
.vue_layout = vue_layout(ish->nir->info.separate_shader),
.nr_color_regions = util_bitcount(color_outputs),
.coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20,
.input_slots_valid =

View file

@ -91,7 +91,9 @@ blorp_compile_vs_elk(struct blorp_context *blorp, void *mem_ctx,
elk_compute_vue_map(compiler->devinfo,
&vs_prog_data->base.vue_map,
nir->info.outputs_written,
nir->info.separate_shader,
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED,
1);
struct elk_vs_prog_key vs_key = { 0, };
@ -231,7 +233,8 @@ blorp_ensure_sf_program_elk(struct blorp_batch *batch,
unsigned program_size;
struct intel_vue_map vue_map;
elk_compute_vue_map(compiler->devinfo, &vue_map, slots_valid, false, 1);
elk_compute_vue_map(compiler->devinfo, &vue_map, slots_valid,
INTEL_VUE_LAYOUT_FIXED, 1);
struct elk_sf_prog_data prog_data_tmp;
program = elk_compile_sf(compiler, mem_ctx, &key.key,

View file

@ -825,7 +825,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
struct intel_vue_map prev_stage_vue_map;
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
nir->info.separate_shader, 1);
key->base.vue_layout, 1);
int first_slot =
brw_compute_first_fs_urb_slot_required(unique_fs_attrs,

View file

@ -161,7 +161,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
GLbitfield64 inputs_read = nir->info.inputs_read;
brw_compute_vue_map(compiler->devinfo,
&input_vue_map, inputs_read,
nir->info.separate_shader, 1);
key->base.vue_layout, 1);
brw_nir_apply_key(nir, compiler, &key->base, dispatch_width);
brw_nir_lower_vue_inputs(nir, &input_vue_map);

View file

@ -1662,12 +1662,9 @@ brw_compile_mesh(const struct brw_compiler *compiler,
brw_nir_lower_tue_inputs(nir, params->tue_map);
/* Incorrectly set separate to false until we fix the anv/brw in the next
* commit.
*/
brw_compute_mue_map(compiler, nir, &prog_data->map,
prog_data->index_format,
false /* TODO: use nir->info.separate_shader */);
key->base.vue_layout);
brw_nir_lower_mue_outputs(nir, &prog_data->map);
prog_data->autostrip_enable = brw_mesh_autostrip_enable(compiler, nir, &prog_data->map);

View file

@ -200,7 +200,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
struct intel_vue_map input_vue_map;
brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
nir->info.separate_shader, 1);
key->base.vue_layout, 1);
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
nir->info.outputs_written,
nir->info.patch_outputs_written);

View file

@ -84,7 +84,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
nir->info.outputs_written,
nir->info.separate_shader, 1);
key->base.vue_layout, 1);
unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;

View file

@ -210,14 +210,16 @@ struct brw_base_prog_key {
bool uses_inline_push_addr:1;
unsigned padding:21;
enum intel_vue_layout vue_layout:2;
/**
* Apply workarounds for SIN and COS input range problems.
* This limits input range for SIN and COS to [-2p : 2p] to
* avoid precision issues.
*/
bool limit_trig_input_range;
bool limit_trig_input_range:1;
unsigned padding:26;
};
/**
@ -1070,7 +1072,7 @@ brw_varying_to_offset(const struct intel_vue_map *vue_map, unsigned varying)
void brw_compute_vue_map(const struct intel_device_info *devinfo,
struct intel_vue_map *vue_map,
uint64_t slots_valid,
bool separate_shader,
enum intel_vue_layout layout,
uint32_t pos_slots);
void brw_compute_tess_vue_map(struct intel_vue_map *const vue_map,

View file

@ -60,10 +60,10 @@ void
brw_compute_vue_map(const struct intel_device_info *devinfo,
struct intel_vue_map *vue_map,
uint64_t slots_valid,
bool separate,
enum intel_vue_layout layout,
uint32_t pos_slots)
{
if (separate) {
if (layout != INTEL_VUE_LAYOUT_FIXED) {
/* In SSO mode, we don't know whether the adjacent stage will
* read/write gl_ClipDistance, which has a fixed slot location.
* We have to assume the worst and reserve a slot for it, or else
@ -77,7 +77,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
}
vue_map->slots_valid = slots_valid;
vue_map->separate = separate;
vue_map->layout = layout;
/* gl_Layer, gl_ViewportIndex & gl_PrimitiveShadingRateEXT don't get their
* own varying slots -- they are stored in the first VUE slot
@ -177,7 +177,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo,
uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
while (generics != 0) {
const int varying = ffsll(generics) - 1;
if (separate) {
if (layout != INTEL_VUE_LAYOUT_FIXED) {
slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
}
assign_vue_slot(vue_map, varying, slot++);
@ -202,8 +202,10 @@ brw_compute_tess_vue_map(struct intel_vue_map *vue_map,
/* I don't think anything actually uses this... */
vue_map->slots_valid = vertex_slots;
/* separate isn't really meaningful, but make sure it's initialized */
vue_map->separate = false;
/* separate isn't really meaningful, we always compiled tessellation
* shaders together, so use a fixed layout.
*/
vue_map->layout = INTEL_VUE_LAYOUT_FIXED;
vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
VARYING_BIT_TESS_LEVEL_INNER);
@ -278,12 +280,17 @@ void
brw_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map,
gl_shader_stage stage)
{
const char *layout_name =
vue_map->layout == INTEL_VUE_LAYOUT_FIXED ? "fixed" :
vue_map->layout == INTEL_VUE_LAYOUT_SEPARATE ? "separate" :
"separate-mesh";
if (vue_map->num_per_vertex_slots > 0 || vue_map->num_per_patch_slots > 0) {
fprintf(fp, "PUE map (%d slots, %d/patch, %d/vertex, %s)\n",
vue_map->num_slots,
vue_map->num_per_patch_slots,
vue_map->num_per_vertex_slots,
vue_map->separate ? "SSO" : "non-SSO");
layout_name);
for (int i = 0; i < vue_map->num_slots; i++) {
if (vue_map->slot_to_varying[i] >= VARYING_SLOT_PATCH0) {
fprintf(fp, " [%02d] VARYING_SLOT_PATCH%d\n", i,
@ -295,8 +302,7 @@ brw_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map,
}
} else {
fprintf(fp, "%s VUE map (%d slots, %s)\n",
gl_shader_stage_name(stage),
vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO");
gl_shader_stage_name(stage), vue_map->num_slots, layout_name);
for (int i = 0; i < vue_map->num_slots; i++) {
fprintf(fp, " [%02d] %s\n", i,
varying_name(vue_map->slot_to_varying[i], stage));

View file

@ -1215,7 +1215,7 @@ elk_varying_to_offset(const struct intel_vue_map *vue_map, unsigned varying)
void elk_compute_vue_map(const struct intel_device_info *devinfo,
struct intel_vue_map *vue_map,
uint64_t slots_valid,
bool separate_shader,
enum intel_vue_layout layout,
uint32_t pos_slots);
void elk_compute_tess_vue_map(struct intel_vue_map *const vue_map,

View file

@ -1424,7 +1424,9 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
struct intel_vue_map prev_stage_vue_map;
elk_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
nir->info.separate_shader, 1);
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED, 1);
int first_slot =
elk_compute_first_urb_slot_required(inputs_read,

View file

@ -1276,7 +1276,9 @@ elk_compile_tes(const struct elk_compiler *compiler,
elk_compute_vue_map(devinfo, &prog_data->base.vue_map,
nir->info.outputs_written,
nir->info.separate_shader, 1);
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED, 1);
unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;

View file

@ -610,7 +610,9 @@ elk_compile_gs(const struct elk_compiler *compiler,
GLbitfield64 inputs_read = nir->info.inputs_read;
elk_compute_vue_map(compiler->devinfo,
&c.input_vue_map, inputs_read,
nir->info.separate_shader, 1);
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED, 1);
elk_nir_apply_key(nir, compiler, &key->base, 8);
elk_nir_lower_vue_inputs(nir, &c.input_vue_map);

View file

@ -373,7 +373,9 @@ elk_compile_tcs(const struct elk_compiler *compiler,
struct intel_vue_map input_vue_map;
elk_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
nir->info.separate_shader, 1);
nir->info.separate_shader ?
INTEL_VUE_LAYOUT_SEPARATE :
INTEL_VUE_LAYOUT_FIXED, 1);
elk_compute_tess_vue_map(&vue_prog_data->vue_map,
nir->info.outputs_written,
nir->info.patch_outputs_written);

View file

@ -60,17 +60,20 @@ void
elk_compute_vue_map(const struct intel_device_info *devinfo,
struct intel_vue_map *vue_map,
uint64_t slots_valid,
bool separate,
enum intel_vue_layout layout,
uint32_t pos_slots)
{
assert(layout == INTEL_VUE_LAYOUT_FIXED ||
layout == INTEL_VUE_LAYOUT_SEPARATE);
/* Keep using the packed/contiguous layout on old hardware - we only need
* the SSO layout when using geometry/tessellation shaders or 32 FS input
* varyings, which only exist on Gen >= 6. It's also a bit more efficient.
*/
if (devinfo->ver < 6)
separate = false;
layout = INTEL_VUE_LAYOUT_FIXED;
if (separate) {
if (layout == INTEL_VUE_LAYOUT_SEPARATE) {
/* In SSO mode, we don't know whether the adjacent stage will
* read/write gl_ClipDistance, which has a fixed slot location.
* We have to assume the worst and reserve a slot for it, or else
@ -84,7 +87,7 @@ elk_compute_vue_map(const struct intel_device_info *devinfo,
}
vue_map->slots_valid = slots_valid;
vue_map->separate = separate;
vue_map->layout = layout;
/* gl_Layer, gl_ViewportIndex & gl_PrimitiveShadingRateEXT don't get their
* own varying slots -- they are stored in the first VUE slot
@ -198,7 +201,7 @@ elk_compute_vue_map(const struct intel_device_info *devinfo,
uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0);
while (generics != 0) {
const int varying = ffsll(generics) - 1;
if (separate) {
if (layout == INTEL_VUE_LAYOUT_SEPARATE) {
slot = first_generic_slot + varying - VARYING_SLOT_VAR0;
}
assign_vue_slot(vue_map, varying, slot++);
@ -224,7 +227,7 @@ elk_compute_tess_vue_map(struct intel_vue_map *vue_map,
vue_map->slots_valid = vertex_slots;
/* separate isn't really meaningful, but make sure it's initialized */
vue_map->separate = false;
vue_map->layout = INTEL_VUE_LAYOUT_FIXED;
vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER |
VARYING_BIT_TESS_LEVEL_INNER);
@ -301,12 +304,15 @@ void
elk_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map,
gl_shader_stage stage)
{
const char *layout_name =
vue_map->layout == INTEL_VUE_LAYOUT_FIXED ? "non-SSO" : "SSO";
if (vue_map->num_per_vertex_slots > 0 || vue_map->num_per_patch_slots > 0) {
fprintf(fp, "PUE map (%d slots, %d/patch, %d/vertex, %s)\n",
vue_map->num_slots,
vue_map->num_per_patch_slots,
vue_map->num_per_vertex_slots,
vue_map->separate ? "SSO" : "non-SSO");
layout_name);
for (int i = 0; i < vue_map->num_slots; i++) {
if (vue_map->slot_to_varying[i] >= VARYING_SLOT_PATCH0) {
fprintf(fp, " [%d] VARYING_SLOT_PATCH%d\n", i,
@ -317,8 +323,7 @@ elk_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map,
}
}
} else {
fprintf(fp, "VUE map (%d slots, %s)\n",
vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO");
fprintf(fp, "VUE map (%d slots, %s)\n", vue_map->num_slots, layout_name);
for (int i = 0; i < vue_map->num_slots; i++) {
fprintf(fp, " [%d] %s\n", i,
varying_name(vue_map->slot_to_varying[i], stage));

View file

@ -122,6 +122,19 @@ enum intel_barycentric_mode {
(1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
(1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
enum intel_vue_layout {
/**
* Layout is fixed and shared by producer/consumer, allowing for tigh
* packing
*/
INTEL_VUE_LAYOUT_FIXED = 0,
/**
* Layout is separate, works for ARB_separate_shader_objects but without
* Mesh support.
*/
INTEL_VUE_LAYOUT_SEPARATE,
};
/**
* Data structure recording the relationship between the gl_varying_slot enum
* and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
@ -142,7 +155,7 @@ struct intel_vue_map {
uint64_t slots_valid;
/**
* Is this VUE map for a separate shader pipeline?
* The layout of the VUE
*
* Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
* without the linker having a chance to dead code eliminate unused varyings.
@ -150,7 +163,7 @@ struct intel_vue_map {
* This means that we have to use a fixed slot layout, based on the output's
* location field, rather than assigning slots in a compact contiguous block.
*/
bool separate;
enum intel_vue_layout layout;
/**
* Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are

View file

@ -316,20 +316,23 @@ anv_get_robust_flags(const struct vk_pipeline_robustness_state *rstate)
static void
populate_base_prog_key(struct anv_pipeline_stage *stage,
const struct anv_device *device)
const struct anv_device *device,
const enum intel_vue_layout vue_layout)
{
stage->key.base.robust_flags = anv_get_robust_flags(&stage->rstate);
stage->key.base.vue_layout = vue_layout;
stage->key.base.limit_trig_input_range =
device->physical->instance->limit_trig_input_range;
}
static void
populate_vs_prog_key(struct anv_pipeline_stage *stage,
const struct anv_device *device)
const struct anv_device *device,
const enum intel_vue_layout vue_layout)
{
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, vue_layout);
stage->key.vs.vf_component_packing =
device->physical->instance->vf_component_packing;
@ -338,31 +341,34 @@ populate_vs_prog_key(struct anv_pipeline_stage *stage,
static void
populate_tcs_prog_key(struct anv_pipeline_stage *stage,
const struct anv_device *device,
unsigned input_vertices)
unsigned input_vertices,
const enum intel_vue_layout vue_layout)
{
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, vue_layout);
stage->key.tcs.input_vertices = input_vertices;
}
static void
populate_tes_prog_key(struct anv_pipeline_stage *stage,
const struct anv_device *device)
const struct anv_device *device,
const enum intel_vue_layout vue_layout)
{
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, vue_layout);
}
static void
populate_gs_prog_key(struct anv_pipeline_stage *stage,
const struct anv_device *device)
const struct anv_device *device,
const enum intel_vue_layout vue_layout)
{
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, vue_layout);
}
static bool
@ -424,18 +430,19 @@ populate_task_prog_key(struct anv_pipeline_stage *stage,
{
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED);
stage->key.base.uses_inline_push_addr = true;
}
static void
populate_mesh_prog_key(struct anv_pipeline_stage *stage,
const struct anv_device *device)
const struct anv_device *device,
const enum intel_vue_layout vue_layout)
{
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, vue_layout);
stage->key.base.uses_inline_push_addr = true;
}
@ -462,13 +469,14 @@ populate_wm_prog_key(struct anv_pipeline_stage *stage,
const struct vk_multisample_state *ms,
const struct vk_fragment_shading_rate_state *fsr,
const struct vk_render_pass_state *rp,
const enum intel_sometimes is_mesh)
const enum intel_sometimes is_mesh,
const enum intel_vue_layout vue_layout)
{
const struct anv_device *device = pipeline->base.device;
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, vue_layout);
struct brw_wm_prog_key *key = &stage->key.wm;
@ -553,7 +561,7 @@ populate_cs_prog_key(struct anv_pipeline_stage *stage,
{
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED);
stage->key.base.uses_inline_push_addr = device->info->verx10 >= 125;
}
@ -565,7 +573,7 @@ populate_bs_prog_key(struct anv_pipeline_stage *stage,
{
memset(&stage->key, 0, sizeof(stage->key));
populate_base_prog_key(stage, device);
populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED);
stage->key.bs.pipeline_ray_flags = ray_flags;
stage->key.bs.pipeline_ray_flags = ray_flags;
@ -1159,7 +1167,7 @@ anv_pipeline_compile_vs(const struct brw_compiler *compiler,
brw_compute_vue_map(compiler->devinfo,
&vs_stage->prog_data.vs.base.vue_map,
vs_stage->nir->info.outputs_written,
vs_stage->nir->info.separate_shader,
vs_stage->key.base.vue_layout,
pos_slots);
vs_stage->num_stats = 1;
@ -1335,7 +1343,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler,
brw_compute_vue_map(compiler->devinfo,
&gs_stage->prog_data.gs.base.vue_map,
gs_stage->nir->info.outputs_written,
gs_stage->nir->info.separate_shader, 1);
gs_stage->key.base.vue_layout, 1);
gs_stage->num_stats = 1;
@ -1522,7 +1530,7 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler,
brw_compute_vue_map(compiler->devinfo,
&prev_vue_map,
fs_stage->nir->info.inputs_read,
fs_stage->nir->info.separate_shader,
fs_stage->key.base.vue_layout,
pos_slots);
fs_stage->key.wm.input_slots_valid = prev_vue_map.slots_valid;
@ -1742,6 +1750,16 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state,
struct anv_pipeline_stage *stages)
{
struct anv_device *device = pipeline->base.device;
enum intel_vue_layout vue_layout;
if ((pipeline->base.flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) ||
!device->vk.enabled_extensions.EXT_graphics_pipeline_library) {
vue_layout = INTEL_VUE_LAYOUT_FIXED;
} else {
vue_layout = INTEL_VUE_LAYOUT_SEPARATE;
}
for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
if (!anv_pipeline_base_has_stage(pipeline, s))
continue;
@ -1751,20 +1769,21 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
const struct anv_device *device = pipeline->base.device;
switch (stages[s].stage) {
case MESA_SHADER_VERTEX:
populate_vs_prog_key(&stages[s], device);
populate_vs_prog_key(&stages[s], device, vue_layout);
break;
case MESA_SHADER_TESS_CTRL:
populate_tcs_prog_key(&stages[s],
device,
BITSET_TEST(state->dynamic,
MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS) ?
0 : state->ts->patch_control_points);
0 : state->ts->patch_control_points,
vue_layout);
break;
case MESA_SHADER_TESS_EVAL:
populate_tes_prog_key(&stages[s], device);
populate_tes_prog_key(&stages[s], device, vue_layout);
break;
case MESA_SHADER_GEOMETRY:
populate_gs_prog_key(&stages[s], device);
populate_gs_prog_key(&stages[s], device, vue_layout);
break;
case MESA_SHADER_FRAGMENT: {
/* Assume rasterization enabled in any of the following case :
@ -1794,7 +1813,8 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
pipeline,
state->dynamic,
raster_enabled ? state->ms : NULL,
state->fsr, state->rp, is_mesh);
state->fsr, state->rp, is_mesh,
vue_layout);
break;
}
@ -1803,7 +1823,7 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
break;
case MESA_SHADER_MESH: {
populate_mesh_prog_key(&stages[s], device);
populate_mesh_prog_key(&stages[s], device, vue_layout);
break;
}
@ -2019,8 +2039,11 @@ anv_pipeline_nir_preprocess(struct anv_pipeline *pipeline,
};
NIR_PASS(_, stage->nir, nir_opt_access, &opt_access_options);
/* Vulkan uses the separate-shader linking model */
stage->nir->info.separate_shader = true;
/* Use a separate-shader linking model for pipeline libraries, we do cross
* stage linking otherwise.
*/
stage->nir->info.separate_shader =
stage->key.base.vue_layout != INTEL_VUE_LAYOUT_FIXED;
struct brw_nir_compiler_opts opts = {
.softfp64 = device->fp64_nir,