diff --git a/src/gallium/drivers/crocus/crocus_program.c b/src/gallium/drivers/crocus/crocus_program.c index 9400380bdd2..7360734da29 100644 --- a/src/gallium/drivers/crocus/crocus_program.c +++ b/src/gallium/drivers/crocus/crocus_program.c @@ -1204,7 +1204,9 @@ crocus_compile_vs(struct crocus_context *ice, crocus_vs_outputs_written(ice, key, nir->info.outputs_written); elk_compute_vue_map(devinfo, &vue_prog_data->vue_map, outputs_written, - nir->info.separate_shader, /* pos slots */ 1); + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1); /* Don't tell the backend about our clip plane constants, we've already * lowered them in NIR and we don't want it doing it again. @@ -1694,7 +1696,9 @@ crocus_compile_gs(struct crocus_context *ice, elk_compute_vue_map(devinfo, &vue_prog_data->vue_map, nir->info.outputs_written, - nir->info.separate_shader, /* pos slots */ 1); + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1); if (devinfo->ver == 6) gfx6_gs_xfb_setup(&ish->stream_output, gs_prog_data); @@ -1969,7 +1973,7 @@ update_last_vue_map(struct crocus_context *ice, ice->state.stage_dirty_for_nos[CROCUS_NOS_LAST_VUE_MAP]; } - if (changed_slots || (old_map && old_map->separate != vue_map->separate)) { + if (changed_slots || (old_map && old_map->layout != vue_map->layout)) { ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE; if (devinfo->ver < 6) ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG; @@ -2872,7 +2876,7 @@ crocus_create_fs_state(struct pipe_context *ctx, if (devinfo->ver < 6) { elk_compute_vue_map(devinfo, &vue_map, info->inputs_read | VARYING_BIT_POS, - false, /* pos slots */ 1); + INTEL_VUE_LAYOUT_FIXED, /* pos slots */ 1); } if (!crocus_disk_cache_retrieve(ice, ish, &key, sizeof(key))) crocus_compile_fs(ice, ish, &key, &vue_map); diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 0c99a33973c..a8d0e7bb63c 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -231,7 +231,8 @@ struct iris_vue_prog_key { struct iris_base_prog_key base; unsigned nr_userclip_plane_consts:4; - unsigned padding:28; + enum intel_vue_layout layout:2; + unsigned padding:26; }; struct iris_vs_prog_key { @@ -284,7 +285,8 @@ struct iris_fs_prog_key { bool multisample_fbo:1; bool force_dual_color_blend:1; bool coherent_fb_fetch:1; - uint64_t padding:43; + enum intel_vue_layout vue_layout:2; + uint64_t padding:41; }; struct iris_cs_prog_key { diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 4af7b8e11e7..7f627f9ff61 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -55,12 +55,20 @@ #include "iris_pipe.h" #include "nir/tgsi_to_nir.h" -#define KEY_INIT(prefix) \ - .prefix.program_string_id = ish->program_id, \ - .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range -#define BRW_KEY_INIT(gen, prog_id, limit_trig_input) \ +static inline enum intel_vue_layout +vue_layout(bool separate_shader) +{ + return separate_shader ? INTEL_VUE_LAYOUT_SEPARATE : INTEL_VUE_LAYOUT_FIXED; +} + +#define KEY_INIT(prefix) \ + .prefix.program_string_id = ish->program_id, \ + .prefix.limit_trig_input_range = \ + screen->driconf.limit_trig_input_range +#define BRW_KEY_INIT(gen, prog_id, limit_trig_input, _vue_layout) \ .base.program_string_id = prog_id, \ - .base.limit_trig_input_range = limit_trig_input + .base.limit_trig_input_range = limit_trig_input, \ + .base.vue_layout = _vue_layout #ifdef INTEL_USE_ELK #define ELK_KEY_INIT(gen, prog_id, limit_trig_input) \ @@ -525,7 +533,8 @@ iris_to_brw_vs_key(const struct iris_screen *screen, { return (struct brw_vs_prog_key) { BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id, - key->vue.base.limit_trig_input_range), + key->vue.base.limit_trig_input_range, + key->vue.layout), }; } @@ -535,7 +544,8 @@ iris_to_brw_tcs_key(const struct iris_screen *screen, { return (struct brw_tcs_prog_key) { BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id, - key->vue.base.limit_trig_input_range), + key->vue.base.limit_trig_input_range, + key->vue.layout), ._tes_primitive_mode = key->_tes_primitive_mode, .input_vertices = key->input_vertices, .patch_outputs_written = key->patch_outputs_written, @@ -549,7 +559,8 @@ iris_to_brw_tes_key(const struct iris_screen *screen, { return (struct brw_tes_prog_key) { BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id, - key->vue.base.limit_trig_input_range), + key->vue.base.limit_trig_input_range, + key->vue.layout), .patch_inputs_read = key->patch_inputs_read, .inputs_read = key->inputs_read, }; @@ -561,7 +572,8 @@ iris_to_brw_gs_key(const struct iris_screen *screen, { return (struct brw_gs_prog_key) { BRW_KEY_INIT(screen->devinfo->ver, key->vue.base.program_string_id, - key->vue.base.limit_trig_input_range), + key->vue.base.limit_trig_input_range, + key->vue.layout), }; } @@ -571,7 +583,8 @@ iris_to_brw_fs_key(const struct iris_screen *screen, { return (struct brw_wm_prog_key) { BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id, - key->base.limit_trig_input_range), + key->base.limit_trig_input_range, + key->vue_layout), .nr_color_regions = key->nr_color_regions, .flat_shade = key->flat_shade, .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha, @@ -595,7 +608,8 @@ iris_to_brw_cs_key(const struct iris_screen *screen, { return (struct brw_cs_prog_key) { BRW_KEY_INIT(screen->devinfo->ver, key->base.program_string_id, - key->base.limit_trig_input_range), + key->base.limit_trig_input_range, + INTEL_VUE_LAYOUT_SEPARATE), }; } @@ -1884,7 +1898,7 @@ iris_compile_vs(struct iris_screen *screen, brw_compute_vue_map(devinfo, &brw_prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader, /* pos_slots */ 1); + key->vue.layout, /* pos_slots */ 1); struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key); @@ -1916,7 +1930,9 @@ iris_compile_vs(struct iris_screen *screen, elk_compute_vue_map(devinfo, &elk_prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader, /* pos_slots */ 1); + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, /* pos_slots */ 1); struct elk_vs_prog_key elk_key = iris_to_elk_vs_key(screen, key); @@ -1983,7 +1999,10 @@ iris_update_compiled_vs(struct iris_context *ice) struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[MESA_SHADER_VERTEX]; - struct iris_vs_prog_key key = { KEY_INIT(vue.base) }; + struct iris_vs_prog_key key = { + KEY_INIT(vue.base), + .vue.layout = vue_layout(ish->nir->info.separate_shader), + }; screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key); struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS]; @@ -2208,6 +2227,7 @@ iris_update_compiled_tcs(struct iris_context *ice) iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL); struct iris_tcs_prog_key key = { .vue.base.program_string_id = tcs ? tcs->program_id : 0, + .vue.layout = vue_layout(tcs ? tcs->nir->info.separate_shader : false), ._tes_primitive_mode = tes_info->tess._primitive_mode, .input_vertices = !tcs || iris_use_tcs_multi_patch(screen) ? ice->state.vertices_per_patch : 0, @@ -2416,7 +2436,10 @@ iris_update_compiled_tes(struct iris_context *ice) struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; - struct iris_tes_prog_key key = { KEY_INIT(vue.base) }; + struct iris_tes_prog_key key = { + KEY_INIT(vue.base), + .vue.layout = vue_layout(ish->nir->info.separate_shader), + }; get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read); screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key); @@ -2500,7 +2523,7 @@ iris_compile_gs(struct iris_screen *screen, brw_compute_vue_map(devinfo, &brw_prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader, /* pos_slots */ 1); + key->vue.layout, /* pos_slots */ 1); struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key); @@ -2530,7 +2553,9 @@ iris_compile_gs(struct iris_screen *screen, elk_compute_vue_map(devinfo, &elk_prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader, /* pos_slots */ 1); + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, /* pos_slots */ 1); struct elk_gs_prog_key elk_key = iris_to_elk_gs_key(screen, key); @@ -2600,7 +2625,10 @@ iris_update_compiled_gs(struct iris_context *ice) struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; if (ish) { - struct iris_gs_prog_key key = { KEY_INIT(vue.base) }; + struct iris_gs_prog_key key = { + KEY_INIT(vue.base), + .vue.layout = vue_layout(ish->nir->info.separate_shader), + }; screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key); bool added; @@ -2777,7 +2805,10 @@ iris_update_compiled_fs(struct iris_context *ice) struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; - struct iris_fs_prog_key key = { KEY_INIT(base) }; + struct iris_fs_prog_key key = { + KEY_INIT(base), + .vue_layout = vue_layout(ish->nir->info.separate_shader), + }; screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key); struct intel_vue_map *last_vue_map = @@ -2847,7 +2878,7 @@ update_last_vue_map(struct iris_context *ice, ice->state.dirty |= IRIS_DIRTY_CLIP; } - if (changed_slots || (old_map && old_map->separate != vue_map->separate)) { + if (changed_slots || (old_map && old_map->layout != vue_map->layout)) { ice->state.dirty |= IRIS_DIRTY_SBE; } @@ -3429,13 +3460,17 @@ iris_create_shader_state(struct pipe_context *ctx, if (info->clip_distance_array_size == 0) ish->nos |= (1ull << IRIS_NOS_RASTERIZER); - key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) }; + key.vs = (struct iris_vs_prog_key) { + KEY_INIT(vue.base), + .vue.layout = vue_layout(ish->nir->info.separate_shader), + }; key_size = sizeof(key.vs); break; case MESA_SHADER_TESS_CTRL: { key.tcs = (struct iris_tcs_prog_key) { KEY_INIT(vue.base), + .vue.layout = vue_layout(ish->nir->info.separate_shader), // XXX: make sure the linker fills this out from the TES... ._tes_primitive_mode = info->tess._primitive_mode ? info->tess._primitive_mode @@ -3463,6 +3498,7 @@ iris_create_shader_state(struct pipe_context *ctx, key.tes = (struct iris_tes_prog_key) { KEY_INIT(vue.base), + .vue.layout = vue_layout(ish->nir->info.separate_shader), // XXX: not ideal, need TCS output/TES input unification .inputs_read = info->inputs_read, .patch_inputs_read = info->patch_inputs_read, @@ -3474,7 +3510,10 @@ iris_create_shader_state(struct pipe_context *ctx, case MESA_SHADER_GEOMETRY: ish->nos |= (1ull << IRIS_NOS_RASTERIZER); - key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) }; + key.gs = (struct iris_gs_prog_key) { + KEY_INIT(vue.base), + .vue.layout = vue_layout(ish->nir->info.separate_shader), + }; key_size = sizeof(key.gs); break; @@ -3505,6 +3544,7 @@ iris_create_shader_state(struct pipe_context *ctx, key.fs = (struct iris_fs_prog_key) { KEY_INIT(base), + .vue_layout = vue_layout(ish->nir->info.separate_shader), .nr_color_regions = util_bitcount(color_outputs), .coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20, .input_slots_valid = diff --git a/src/intel/blorp/blorp_elk.c b/src/intel/blorp/blorp_elk.c index cac5315278b..d9b6c390144 100644 --- a/src/intel/blorp/blorp_elk.c +++ b/src/intel/blorp/blorp_elk.c @@ -91,7 +91,9 @@ blorp_compile_vs_elk(struct blorp_context *blorp, void *mem_ctx, elk_compute_vue_map(compiler->devinfo, &vs_prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader, + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, 1); struct elk_vs_prog_key vs_key = { 0, }; @@ -231,7 +233,8 @@ blorp_ensure_sf_program_elk(struct blorp_batch *batch, unsigned program_size; struct intel_vue_map vue_map; - elk_compute_vue_map(compiler->devinfo, &vue_map, slots_valid, false, 1); + elk_compute_vue_map(compiler->devinfo, &vue_map, slots_valid, + INTEL_VUE_LAYOUT_FIXED, 1); struct elk_sf_prog_data prog_data_tmp; program = elk_compile_sf(compiler, mem_ctx, &key.key, diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index f7518d370b7..0ad3c9cfcab 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -825,7 +825,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo, struct intel_vue_map prev_stage_vue_map; brw_compute_vue_map(devinfo, &prev_stage_vue_map, key->input_slots_valid, - nir->info.separate_shader, 1); + key->base.vue_layout, 1); int first_slot = brw_compute_first_fs_urb_slot_required(unique_fs_attrs, diff --git a/src/intel/compiler/brw_compile_gs.cpp b/src/intel/compiler/brw_compile_gs.cpp index 3161a5845fa..b0c0b11a2a3 100644 --- a/src/intel/compiler/brw_compile_gs.cpp +++ b/src/intel/compiler/brw_compile_gs.cpp @@ -161,7 +161,7 @@ brw_compile_gs(const struct brw_compiler *compiler, GLbitfield64 inputs_read = nir->info.inputs_read; brw_compute_vue_map(compiler->devinfo, &input_vue_map, inputs_read, - nir->info.separate_shader, 1); + key->base.vue_layout, 1); brw_nir_apply_key(nir, compiler, &key->base, dispatch_width); brw_nir_lower_vue_inputs(nir, &input_vue_map); diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp index 87337412b5d..82562e358a6 100644 --- a/src/intel/compiler/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw_compile_mesh.cpp @@ -1662,12 +1662,9 @@ brw_compile_mesh(const struct brw_compiler *compiler, brw_nir_lower_tue_inputs(nir, params->tue_map); - /* Incorrectly set separate to false until we fix the anv/brw in the next - * commit. - */ brw_compute_mue_map(compiler, nir, &prog_data->map, prog_data->index_format, - false /* TODO: use nir->info.separate_shader */); + key->base.vue_layout); brw_nir_lower_mue_outputs(nir, &prog_data->map); prog_data->autostrip_enable = brw_mesh_autostrip_enable(compiler, nir, &prog_data->map); diff --git a/src/intel/compiler/brw_compile_tcs.cpp b/src/intel/compiler/brw_compile_tcs.cpp index c32f6f96a5f..96a51bb97e1 100644 --- a/src/intel/compiler/brw_compile_tcs.cpp +++ b/src/intel/compiler/brw_compile_tcs.cpp @@ -200,7 +200,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, struct intel_vue_map input_vue_map; brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read, - nir->info.separate_shader, 1); + key->base.vue_layout, 1); brw_compute_tess_vue_map(&vue_prog_data->vue_map, nir->info.outputs_written, nir->info.patch_outputs_written); diff --git a/src/intel/compiler/brw_compile_tes.cpp b/src/intel/compiler/brw_compile_tes.cpp index 484f7bcf5c2..d6f13baae0d 100644 --- a/src/intel/compiler/brw_compile_tes.cpp +++ b/src/intel/compiler/brw_compile_tes.cpp @@ -84,7 +84,7 @@ brw_compile_tes(const struct brw_compiler *compiler, brw_compute_vue_map(devinfo, &prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader, 1); + key->base.vue_layout, 1); unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index b56bef64337..b45f0eb0393 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -210,14 +210,16 @@ struct brw_base_prog_key { bool uses_inline_push_addr:1; - unsigned padding:21; + enum intel_vue_layout vue_layout:2; /** * Apply workarounds for SIN and COS input range problems. * This limits input range for SIN and COS to [-2p : 2p] to * avoid precision issues. */ - bool limit_trig_input_range; + bool limit_trig_input_range:1; + + unsigned padding:26; }; /** @@ -1070,7 +1072,7 @@ brw_varying_to_offset(const struct intel_vue_map *vue_map, unsigned varying) void brw_compute_vue_map(const struct intel_device_info *devinfo, struct intel_vue_map *vue_map, uint64_t slots_valid, - bool separate_shader, + enum intel_vue_layout layout, uint32_t pos_slots); void brw_compute_tess_vue_map(struct intel_vue_map *const vue_map, diff --git a/src/intel/compiler/brw_vue_map.c b/src/intel/compiler/brw_vue_map.c index 1cb5774c350..922c208df8e 100644 --- a/src/intel/compiler/brw_vue_map.c +++ b/src/intel/compiler/brw_vue_map.c @@ -60,10 +60,10 @@ void brw_compute_vue_map(const struct intel_device_info *devinfo, struct intel_vue_map *vue_map, uint64_t slots_valid, - bool separate, + enum intel_vue_layout layout, uint32_t pos_slots) { - if (separate) { + if (layout != INTEL_VUE_LAYOUT_FIXED) { /* In SSO mode, we don't know whether the adjacent stage will * read/write gl_ClipDistance, which has a fixed slot location. * We have to assume the worst and reserve a slot for it, or else @@ -77,7 +77,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo, } vue_map->slots_valid = slots_valid; - vue_map->separate = separate; + vue_map->layout = layout; /* gl_Layer, gl_ViewportIndex & gl_PrimitiveShadingRateEXT don't get their * own varying slots -- they are stored in the first VUE slot @@ -177,7 +177,7 @@ brw_compute_vue_map(const struct intel_device_info *devinfo, uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0); while (generics != 0) { const int varying = ffsll(generics) - 1; - if (separate) { + if (layout != INTEL_VUE_LAYOUT_FIXED) { slot = first_generic_slot + varying - VARYING_SLOT_VAR0; } assign_vue_slot(vue_map, varying, slot++); @@ -202,8 +202,10 @@ brw_compute_tess_vue_map(struct intel_vue_map *vue_map, /* I don't think anything actually uses this... */ vue_map->slots_valid = vertex_slots; - /* separate isn't really meaningful, but make sure it's initialized */ - vue_map->separate = false; + /* separate isn't really meaningful, we always compiled tessellation + * shaders together, so use a fixed layout. + */ + vue_map->layout = INTEL_VUE_LAYOUT_FIXED; vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER); @@ -278,12 +280,17 @@ void brw_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map, gl_shader_stage stage) { + const char *layout_name = + vue_map->layout == INTEL_VUE_LAYOUT_FIXED ? "fixed" : + vue_map->layout == INTEL_VUE_LAYOUT_SEPARATE ? "separate" : + "separate-mesh"; + if (vue_map->num_per_vertex_slots > 0 || vue_map->num_per_patch_slots > 0) { fprintf(fp, "PUE map (%d slots, %d/patch, %d/vertex, %s)\n", vue_map->num_slots, vue_map->num_per_patch_slots, vue_map->num_per_vertex_slots, - vue_map->separate ? "SSO" : "non-SSO"); + layout_name); for (int i = 0; i < vue_map->num_slots; i++) { if (vue_map->slot_to_varying[i] >= VARYING_SLOT_PATCH0) { fprintf(fp, " [%02d] VARYING_SLOT_PATCH%d\n", i, @@ -295,8 +302,7 @@ brw_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map, } } else { fprintf(fp, "%s VUE map (%d slots, %s)\n", - gl_shader_stage_name(stage), - vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO"); + gl_shader_stage_name(stage), vue_map->num_slots, layout_name); for (int i = 0; i < vue_map->num_slots; i++) { fprintf(fp, " [%02d] %s\n", i, varying_name(vue_map->slot_to_varying[i], stage)); diff --git a/src/intel/compiler/elk/elk_compiler.h b/src/intel/compiler/elk/elk_compiler.h index 5162723e674..0f8f658b1f7 100644 --- a/src/intel/compiler/elk/elk_compiler.h +++ b/src/intel/compiler/elk/elk_compiler.h @@ -1215,7 +1215,7 @@ elk_varying_to_offset(const struct intel_vue_map *vue_map, unsigned varying) void elk_compute_vue_map(const struct intel_device_info *devinfo, struct intel_vue_map *vue_map, uint64_t slots_valid, - bool separate_shader, + enum intel_vue_layout layout, uint32_t pos_slots); void elk_compute_tess_vue_map(struct intel_vue_map *const vue_map, diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp index 530f04568f5..0a6a6627459 100644 --- a/src/intel/compiler/elk/elk_fs.cpp +++ b/src/intel/compiler/elk/elk_fs.cpp @@ -1424,7 +1424,9 @@ calculate_urb_setup(const struct intel_device_info *devinfo, struct intel_vue_map prev_stage_vue_map; elk_compute_vue_map(devinfo, &prev_stage_vue_map, key->input_slots_valid, - nir->info.separate_shader, 1); + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, 1); int first_slot = elk_compute_first_urb_slot_required(inputs_read, diff --git a/src/intel/compiler/elk/elk_shader.cpp b/src/intel/compiler/elk/elk_shader.cpp index affaf16eeed..05c0a3b3032 100644 --- a/src/intel/compiler/elk/elk_shader.cpp +++ b/src/intel/compiler/elk/elk_shader.cpp @@ -1276,7 +1276,9 @@ elk_compile_tes(const struct elk_compiler *compiler, elk_compute_vue_map(devinfo, &prog_data->base.vue_map, nir->info.outputs_written, - nir->info.separate_shader, 1); + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, 1); unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; diff --git a/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp b/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp index 21867747b4e..06cc7b8ab74 100644 --- a/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp +++ b/src/intel/compiler/elk/elk_vec4_gs_visitor.cpp @@ -610,7 +610,9 @@ elk_compile_gs(const struct elk_compiler *compiler, GLbitfield64 inputs_read = nir->info.inputs_read; elk_compute_vue_map(compiler->devinfo, &c.input_vue_map, inputs_read, - nir->info.separate_shader, 1); + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, 1); elk_nir_apply_key(nir, compiler, &key->base, 8); elk_nir_lower_vue_inputs(nir, &c.input_vue_map); diff --git a/src/intel/compiler/elk/elk_vec4_tcs.cpp b/src/intel/compiler/elk/elk_vec4_tcs.cpp index 673af595c58..1772670f255 100644 --- a/src/intel/compiler/elk/elk_vec4_tcs.cpp +++ b/src/intel/compiler/elk/elk_vec4_tcs.cpp @@ -373,7 +373,9 @@ elk_compile_tcs(const struct elk_compiler *compiler, struct intel_vue_map input_vue_map; elk_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read, - nir->info.separate_shader, 1); + nir->info.separate_shader ? + INTEL_VUE_LAYOUT_SEPARATE : + INTEL_VUE_LAYOUT_FIXED, 1); elk_compute_tess_vue_map(&vue_prog_data->vue_map, nir->info.outputs_written, nir->info.patch_outputs_written); diff --git a/src/intel/compiler/elk/elk_vue_map.c b/src/intel/compiler/elk/elk_vue_map.c index bb8f751ce83..e119b2e31db 100644 --- a/src/intel/compiler/elk/elk_vue_map.c +++ b/src/intel/compiler/elk/elk_vue_map.c @@ -60,17 +60,20 @@ void elk_compute_vue_map(const struct intel_device_info *devinfo, struct intel_vue_map *vue_map, uint64_t slots_valid, - bool separate, + enum intel_vue_layout layout, uint32_t pos_slots) { + assert(layout == INTEL_VUE_LAYOUT_FIXED || + layout == INTEL_VUE_LAYOUT_SEPARATE); + /* Keep using the packed/contiguous layout on old hardware - we only need * the SSO layout when using geometry/tessellation shaders or 32 FS input * varyings, which only exist on Gen >= 6. It's also a bit more efficient. */ if (devinfo->ver < 6) - separate = false; + layout = INTEL_VUE_LAYOUT_FIXED; - if (separate) { + if (layout == INTEL_VUE_LAYOUT_SEPARATE) { /* In SSO mode, we don't know whether the adjacent stage will * read/write gl_ClipDistance, which has a fixed slot location. * We have to assume the worst and reserve a slot for it, or else @@ -84,7 +87,7 @@ elk_compute_vue_map(const struct intel_device_info *devinfo, } vue_map->slots_valid = slots_valid; - vue_map->separate = separate; + vue_map->layout = layout; /* gl_Layer, gl_ViewportIndex & gl_PrimitiveShadingRateEXT don't get their * own varying slots -- they are stored in the first VUE slot @@ -198,7 +201,7 @@ elk_compute_vue_map(const struct intel_device_info *devinfo, uint64_t generics = slots_valid & ~BITFIELD64_MASK(VARYING_SLOT_VAR0); while (generics != 0) { const int varying = ffsll(generics) - 1; - if (separate) { + if (layout == INTEL_VUE_LAYOUT_SEPARATE) { slot = first_generic_slot + varying - VARYING_SLOT_VAR0; } assign_vue_slot(vue_map, varying, slot++); @@ -224,7 +227,7 @@ elk_compute_tess_vue_map(struct intel_vue_map *vue_map, vue_map->slots_valid = vertex_slots; /* separate isn't really meaningful, but make sure it's initialized */ - vue_map->separate = false; + vue_map->layout = INTEL_VUE_LAYOUT_FIXED; vertex_slots &= ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER); @@ -301,12 +304,15 @@ void elk_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map, gl_shader_stage stage) { + const char *layout_name = + vue_map->layout == INTEL_VUE_LAYOUT_FIXED ? "non-SSO" : "SSO"; + if (vue_map->num_per_vertex_slots > 0 || vue_map->num_per_patch_slots > 0) { fprintf(fp, "PUE map (%d slots, %d/patch, %d/vertex, %s)\n", vue_map->num_slots, vue_map->num_per_patch_slots, vue_map->num_per_vertex_slots, - vue_map->separate ? "SSO" : "non-SSO"); + layout_name); for (int i = 0; i < vue_map->num_slots; i++) { if (vue_map->slot_to_varying[i] >= VARYING_SLOT_PATCH0) { fprintf(fp, " [%d] VARYING_SLOT_PATCH%d\n", i, @@ -317,8 +323,7 @@ elk_print_vue_map(FILE *fp, const struct intel_vue_map *vue_map, } } } else { - fprintf(fp, "VUE map (%d slots, %s)\n", - vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO"); + fprintf(fp, "VUE map (%d slots, %s)\n", vue_map->num_slots, layout_name); for (int i = 0; i < vue_map->num_slots; i++) { fprintf(fp, " [%d] %s\n", i, varying_name(vue_map->slot_to_varying[i], stage)); diff --git a/src/intel/compiler/intel_shader_enums.h b/src/intel/compiler/intel_shader_enums.h index 15a03ba781d..91255da816c 100644 --- a/src/intel/compiler/intel_shader_enums.h +++ b/src/intel/compiler/intel_shader_enums.h @@ -122,6 +122,19 @@ enum intel_barycentric_mode { (1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \ (1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE)) +enum intel_vue_layout { + /** + * Layout is fixed and shared by producer/consumer, allowing for tigh + * packing + */ + INTEL_VUE_LAYOUT_FIXED = 0, + /** + * Layout is separate, works for ARB_separate_shader_objects but without + * Mesh support. + */ + INTEL_VUE_LAYOUT_SEPARATE, +}; + /** * Data structure recording the relationship between the gl_varying_slot enum * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a @@ -142,7 +155,7 @@ struct intel_vue_map { uint64_t slots_valid; /** - * Is this VUE map for a separate shader pipeline? + * The layout of the VUE * * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched * without the linker having a chance to dead code eliminate unused varyings. @@ -150,7 +163,7 @@ struct intel_vue_map { * This means that we have to use a fixed slot layout, based on the output's * location field, rather than assigning slots in a compact contiguous block. */ - bool separate; + enum intel_vue_layout layout; /** * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 3ff44b435f8..44629946981 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -316,20 +316,23 @@ anv_get_robust_flags(const struct vk_pipeline_robustness_state *rstate) static void populate_base_prog_key(struct anv_pipeline_stage *stage, - const struct anv_device *device) + const struct anv_device *device, + const enum intel_vue_layout vue_layout) { stage->key.base.robust_flags = anv_get_robust_flags(&stage->rstate); + stage->key.base.vue_layout = vue_layout; stage->key.base.limit_trig_input_range = device->physical->instance->limit_trig_input_range; } static void populate_vs_prog_key(struct anv_pipeline_stage *stage, - const struct anv_device *device) + const struct anv_device *device, + const enum intel_vue_layout vue_layout) { memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, vue_layout); stage->key.vs.vf_component_packing = device->physical->instance->vf_component_packing; @@ -338,31 +341,34 @@ populate_vs_prog_key(struct anv_pipeline_stage *stage, static void populate_tcs_prog_key(struct anv_pipeline_stage *stage, const struct anv_device *device, - unsigned input_vertices) + unsigned input_vertices, + const enum intel_vue_layout vue_layout) { memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, vue_layout); stage->key.tcs.input_vertices = input_vertices; } static void populate_tes_prog_key(struct anv_pipeline_stage *stage, - const struct anv_device *device) + const struct anv_device *device, + const enum intel_vue_layout vue_layout) { memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, vue_layout); } static void populate_gs_prog_key(struct anv_pipeline_stage *stage, - const struct anv_device *device) + const struct anv_device *device, + const enum intel_vue_layout vue_layout) { memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, vue_layout); } static bool @@ -424,18 +430,19 @@ populate_task_prog_key(struct anv_pipeline_stage *stage, { memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED); stage->key.base.uses_inline_push_addr = true; } static void populate_mesh_prog_key(struct anv_pipeline_stage *stage, - const struct anv_device *device) + const struct anv_device *device, + const enum intel_vue_layout vue_layout) { memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, vue_layout); stage->key.base.uses_inline_push_addr = true; } @@ -462,13 +469,14 @@ populate_wm_prog_key(struct anv_pipeline_stage *stage, const struct vk_multisample_state *ms, const struct vk_fragment_shading_rate_state *fsr, const struct vk_render_pass_state *rp, - const enum intel_sometimes is_mesh) + const enum intel_sometimes is_mesh, + const enum intel_vue_layout vue_layout) { const struct anv_device *device = pipeline->base.device; memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, vue_layout); struct brw_wm_prog_key *key = &stage->key.wm; @@ -553,7 +561,7 @@ populate_cs_prog_key(struct anv_pipeline_stage *stage, { memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED); stage->key.base.uses_inline_push_addr = device->info->verx10 >= 125; } @@ -565,7 +573,7 @@ populate_bs_prog_key(struct anv_pipeline_stage *stage, { memset(&stage->key, 0, sizeof(stage->key)); - populate_base_prog_key(stage, device); + populate_base_prog_key(stage, device, INTEL_VUE_LAYOUT_FIXED); stage->key.bs.pipeline_ray_flags = ray_flags; stage->key.bs.pipeline_ray_flags = ray_flags; @@ -1159,7 +1167,7 @@ anv_pipeline_compile_vs(const struct brw_compiler *compiler, brw_compute_vue_map(compiler->devinfo, &vs_stage->prog_data.vs.base.vue_map, vs_stage->nir->info.outputs_written, - vs_stage->nir->info.separate_shader, + vs_stage->key.base.vue_layout, pos_slots); vs_stage->num_stats = 1; @@ -1335,7 +1343,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler, brw_compute_vue_map(compiler->devinfo, &gs_stage->prog_data.gs.base.vue_map, gs_stage->nir->info.outputs_written, - gs_stage->nir->info.separate_shader, 1); + gs_stage->key.base.vue_layout, 1); gs_stage->num_stats = 1; @@ -1522,7 +1530,7 @@ anv_pipeline_compile_fs(const struct brw_compiler *compiler, brw_compute_vue_map(compiler->devinfo, &prev_vue_map, fs_stage->nir->info.inputs_read, - fs_stage->nir->info.separate_shader, + fs_stage->key.base.vue_layout, pos_slots); fs_stage->key.wm.input_slots_valid = prev_vue_map.slots_valid; @@ -1742,6 +1750,16 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline, const struct vk_graphics_pipeline_state *state, struct anv_pipeline_stage *stages) { + struct anv_device *device = pipeline->base.device; + enum intel_vue_layout vue_layout; + + if ((pipeline->base.flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) || + !device->vk.enabled_extensions.EXT_graphics_pipeline_library) { + vue_layout = INTEL_VUE_LAYOUT_FIXED; + } else { + vue_layout = INTEL_VUE_LAYOUT_SEPARATE; + } + for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) { if (!anv_pipeline_base_has_stage(pipeline, s)) continue; @@ -1751,20 +1769,21 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline, const struct anv_device *device = pipeline->base.device; switch (stages[s].stage) { case MESA_SHADER_VERTEX: - populate_vs_prog_key(&stages[s], device); + populate_vs_prog_key(&stages[s], device, vue_layout); break; case MESA_SHADER_TESS_CTRL: populate_tcs_prog_key(&stages[s], device, BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS) ? - 0 : state->ts->patch_control_points); + 0 : state->ts->patch_control_points, + vue_layout); break; case MESA_SHADER_TESS_EVAL: - populate_tes_prog_key(&stages[s], device); + populate_tes_prog_key(&stages[s], device, vue_layout); break; case MESA_SHADER_GEOMETRY: - populate_gs_prog_key(&stages[s], device); + populate_gs_prog_key(&stages[s], device, vue_layout); break; case MESA_SHADER_FRAGMENT: { /* Assume rasterization enabled in any of the following case : @@ -1794,7 +1813,8 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline, pipeline, state->dynamic, raster_enabled ? state->ms : NULL, - state->fsr, state->rp, is_mesh); + state->fsr, state->rp, is_mesh, + vue_layout); break; } @@ -1803,7 +1823,7 @@ anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline, break; case MESA_SHADER_MESH: { - populate_mesh_prog_key(&stages[s], device); + populate_mesh_prog_key(&stages[s], device, vue_layout); break; } @@ -2019,8 +2039,11 @@ anv_pipeline_nir_preprocess(struct anv_pipeline *pipeline, }; NIR_PASS(_, stage->nir, nir_opt_access, &opt_access_options); - /* Vulkan uses the separate-shader linking model */ - stage->nir->info.separate_shader = true; + /* Use a separate-shader linking model for pipeline libraries, we do cross + * stage linking otherwise. + */ + stage->nir->info.separate_shader = + stage->key.base.vue_layout != INTEL_VUE_LAYOUT_FIXED; struct brw_nir_compiler_opts opts = { .softfp64 = device->fp64_nir,