diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 8f18388da68..592c63adaab 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -5179,513 +5179,6 @@ zink_flat_flags(struct nir_shader *shader) return flat_flags; } -struct rework_io_state { - /* these are search criteria */ - bool indirect_only; - unsigned location; - nir_variable_mode mode; - mesa_shader_stage stage; - nir_shader *nir; - const char *name; - - /* these are found by scanning */ - bool arrayed_io; - bool medium_precision; - bool fb_fetch_output; - bool dual_source_blend_index; - uint32_t component_mask; - uint32_t ignored_component_mask; - unsigned array_size; - unsigned bit_size; - unsigned base; - nir_alu_type type; - /* must be last */ - char *newname; -}; - -/* match an existing variable against the rework state */ -static nir_variable * -find_rework_var(nir_shader *nir, struct rework_io_state *ris) -{ - nir_foreach_variable_with_modes(var, nir, ris->mode) { - const struct glsl_type *type = var->type; - if (nir_is_arrayed_io(var, nir->info.stage)) - type = glsl_get_array_element(type); - if (var->data.fb_fetch_output != ris->fb_fetch_output) - continue; - if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out && ris->dual_source_blend_index != var->data.index) - continue; - unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false); - if (var->data.location > ris->location + ris->array_size || var->data.location + num_slots <= ris->location) - continue; - unsigned num_components = glsl_get_vector_elements(glsl_without_array(type)); - assert(!glsl_type_contains_64bit(type)); - uint32_t component_mask = ris->component_mask ? ris->component_mask : BITFIELD_MASK(4); - if (BITFIELD_RANGE(var->data.location_frac, num_components) & component_mask) - return var; - } - return NULL; -} - -static void -update_io_var_name(struct rework_io_state *ris, const char *name) -{ - if (!(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))) - return; - if (!name) - return; - if (ris->name && !strcmp(ris->name, name)) - return; - if (ris->newname && !strcmp(ris->newname, name)) - return; - if (ris->newname) { - ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->newname, name); - } else if (ris->name) { - ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->name, name); - } else { - ris->newname = ralloc_strdup(ris->nir, name); - } -} - -/* check/update tracking state for variable info */ -static void -update_io_var_state(nir_intrinsic_instr *intr, struct rework_io_state *ris) -{ - bool is_load = false; - bool is_input = false; - bool is_interp = false; - filter_io_instr(intr, &is_load, &is_input, &is_interp); - nir_io_semantics sem = nir_intrinsic_io_semantics(intr); - unsigned frac = nir_intrinsic_component(intr); - /* the mask of components for the instruction */ - uint32_t cmask = is_load ? BITFIELD_RANGE(frac, intr->num_components) : (nir_intrinsic_write_mask(intr) << frac); - - /* always check for existing variables first */ - struct rework_io_state test = { - .location = ris->location, - .mode = ris->mode, - .stage = ris->stage, - .arrayed_io = io_instr_is_arrayed(intr), - .medium_precision = sem.medium_precision, - .fb_fetch_output = sem.fb_fetch_output, - .dual_source_blend_index = sem.dual_source_blend_index, - .component_mask = cmask, - .array_size = sem.num_slots > 1 ? sem.num_slots : 0, - }; - if (find_rework_var(ris->nir, &test)) - return; - - /* filter ignored components to scan later: - * - ignore no-overlapping-components case - * - always match fbfetch and dual src blend - */ - if (ris->component_mask && - (!(ris->component_mask & cmask) || ris->fb_fetch_output != sem.fb_fetch_output || ris->dual_source_blend_index != sem.dual_source_blend_index)) { - ris->ignored_component_mask |= cmask; - return; - } - - assert(!ris->indirect_only || sem.num_slots > 1); - if (sem.num_slots > 1) - ris->array_size = MAX2(ris->array_size, sem.num_slots); - - assert(!ris->component_mask || ris->arrayed_io == io_instr_is_arrayed(intr)); - ris->arrayed_io = io_instr_is_arrayed(intr); - - ris->component_mask |= cmask; - - unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]); - assert(!ris->bit_size || ris->bit_size == bit_size); - ris->bit_size = bit_size; - - nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr); - if (ris->type) { - /* in the case of clashing types, this heuristic guarantees some semblance of a match */ - if (ris->type & nir_type_float || type & nir_type_float) { - ris->type = nir_type_float | bit_size; - } else if (ris->type & nir_type_int || type & nir_type_int) { - ris->type = nir_type_int | bit_size; - } else if (ris->type & nir_type_uint || type & nir_type_uint) { - ris->type = nir_type_uint | bit_size; - } else { - assert(bit_size == 1); - ris->type = nir_type_bool; - } - } else { - ris->type = type; - } - - update_io_var_name(ris, intr->name); - - ris->medium_precision |= sem.medium_precision; - ris->fb_fetch_output |= sem.fb_fetch_output; - ris->dual_source_blend_index |= sem.dual_source_blend_index; - if (ris->stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) - ris->base = nir_intrinsic_base(intr); -} - -/* instruction-level scanning for variable data */ -static bool -scan_io_var_usage(nir_builder *b, nir_intrinsic_instr *intr, void *data) -{ - struct rework_io_state *ris = data; - bool is_load = false; - bool is_input = false; - bool is_interp = false; - /* mode-based filtering */ - if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) - return false; - if (ris->mode == nir_var_shader_in) { - if (!is_input) - return false; - } else { - if (is_input) - return false; - } - /* location-based filtering */ - nir_io_semantics sem = nir_intrinsic_io_semantics(intr); - if (sem.location != ris->location && (ris->location > sem.location || ris->location + ris->array_size <= sem.location)) - return false; - - /* only scan indirect i/o when indirect_only is set */ - nir_src *src_offset = nir_get_io_offset_src(intr); - if (!nir_src_is_const(*src_offset)) { - if (!ris->indirect_only) - return false; - update_io_var_state(intr, ris); - return false; - } - - /* don't scan direct i/o when indirect_only is set */ - if (ris->indirect_only) - return false; - - update_io_var_state(intr, ris); - return false; -} - -/* scan a given i/o slot for state info */ -static struct rework_io_state -scan_io_var_slot(nir_shader *nir, nir_variable_mode mode, unsigned location, bool scan_indirects) -{ - struct rework_io_state ris = { - .location = location, - .mode = mode, - .stage = nir->info.stage, - .nir = nir, - }; - - struct rework_io_state test; - do { - update_io_var_name(&test, ris.newname ? ris.newname : ris.name); - test = ris; - /* always run indirect scan first to detect potential overlaps */ - if (scan_indirects) { - ris.indirect_only = true; - nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris); - } - ris.indirect_only = false; - nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris); - /* keep scanning until no changes found */ - } while (memcmp(&ris, &test, offsetof(struct rework_io_state, newname))); - return ris; -} - -/* create a variable using explicit/scan info */ -static void -create_io_var(nir_shader *nir, struct rework_io_state *ris) -{ - char name[1024]; - assert(ris->component_mask); - if (ris->newname || ris->name) { - snprintf(name, sizeof(name), "%s", ris->newname ? ris->newname : ris->name); - /* always use builtin name where possible */ - } else if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) { - snprintf(name, sizeof(name), "%s", gl_vert_attrib_name(ris->location)); - } else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out) { - snprintf(name, sizeof(name), "%s", gl_frag_result_name(ris->location)); - } else if (nir_slot_is_sysval_output(ris->location, nir->info.stage)) { - snprintf(name, sizeof(name), "%s", gl_varying_slot_name_for_stage(ris->location, nir->info.stage)); - } else { - int c = ffs(ris->component_mask) - 1; - if (c) - snprintf(name, sizeof(name), "slot_%u_c%u", ris->location, c); - else - snprintf(name, sizeof(name), "slot_%u", ris->location); - } - /* calculate vec/array type */ - int frac = ffs(ris->component_mask) - 1; - int num_components = util_last_bit(ris->component_mask) - frac; - assert(ris->component_mask == BITFIELD_RANGE(frac, num_components)); - const struct glsl_type *vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(ris->type), num_components); - if (ris->array_size) - vec_type = glsl_array_type(vec_type, ris->array_size, glsl_get_explicit_stride(vec_type)); - if (ris->arrayed_io) { - /* tess size may be unknown with generated tcs */ - unsigned arrayed = nir->info.stage == MESA_SHADER_GEOMETRY ? - nir->info.gs.vertices_in : - nir->info.stage == MESA_SHADER_MESH ? - nir->info.mesh.max_primitives_out : - 32 /* MAX_PATCH_VERTICES */; - vec_type = glsl_array_type(vec_type, arrayed, glsl_get_explicit_stride(vec_type)); - } - nir_variable *var = nir_variable_create(nir, ris->mode, vec_type, name); - var->data.location_frac = frac; - var->data.location = ris->location; - /* gallium vertex inputs use intrinsic 'base' indexing */ - if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) - var->data.driver_location = ris->base; - bool is_tess_level = (nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) && - (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER); - var->data.patch = ris->location >= VARYING_SLOT_PATCH0 || is_tess_level; - /* set flat by default: add_derefs will fill this in later after more shader passes */ - if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in) - var->data.interpolation = INTERP_MODE_FLAT; - var->data.fb_fetch_output = ris->fb_fetch_output; - var->data.index = ris->dual_source_blend_index; - var->data.precision = ris->medium_precision; - if (nir->info.stage == MESA_SHADER_MESH && ris->mode == nir_var_shader_out) - var->data.per_primitive = (nir->info.per_primitive_outputs & BITFIELD64_BIT(ris->location)) > 0; - else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in) - var->data.per_primitive = (nir->info.per_primitive_inputs & BITFIELD64_BIT(ris->location)) > 0; - /* only clip/cull dist and tess levels are compact */ - if (nir->info.stage != MESA_SHADER_VERTEX || ris->mode != nir_var_shader_in) - var->data.compact = is_clipcull_dist(ris->location) || is_tess_level; -} - -/* loop the i/o mask and generate variables for specified locations */ -static void -loop_io_var_mask(nir_shader *nir, nir_variable_mode mode, bool indirect, bool patch, uint64_t mask) -{ - ASSERTED bool is_vertex_input = nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in; - u_foreach_bit64(slot, mask) { - unsigned location = slot; - if (patch) - location += VARYING_SLOT_PATCH0; - - /* this should've been handled explicitly */ - assert(is_vertex_input || !is_clipcull_dist(location)); - - unsigned remaining = 0; - do { - /* scan the slot for usage */ - struct rework_io_state ris = scan_io_var_slot(nir, mode, location, indirect); - /* one of these must be true or things have gone very wrong */ - assert(indirect || ris.component_mask || find_rework_var(nir, &ris) || remaining); - /* release builds only */ - if (!ris.component_mask) - break; - - /* whatever reaches this point is either enough info to create a variable or an existing variable */ - if (!find_rework_var(nir, &ris)) - create_io_var(nir, &ris); - /* scanning may detect multiple potential variables per location at component offsets: process again */ - remaining = ris.ignored_component_mask; - } while (remaining); - } -} - -/* for a given mode, generate variables */ -static void -rework_io_vars(nir_shader *nir, nir_variable_mode mode, struct zink_shader *zs) -{ - assert(mode == nir_var_shader_out || mode == nir_var_shader_in); - assert(util_bitcount(mode) == 1); - bool found = false; - /* if no i/o, skip */ - if (mode == nir_var_shader_out) - found = nir->info.outputs_written || nir->info.outputs_read || nir->info.patch_outputs_written || nir->info.patch_outputs_read; - else - found = nir->info.inputs_read || nir->info.patch_inputs_read; - if (!found) - return; - - /* use local copies to enable incremental processing */ - uint64_t inputs_read = nir->info.inputs_read; - uint64_t inputs_read_indirectly = nir->info.inputs_read_indirectly; - uint64_t outputs_accessed = nir->info.outputs_written | nir->info.outputs_read; - uint64_t outputs_accessed_indirectly = nir->info.outputs_read_indirectly | - nir->info.outputs_written_indirectly; - - /* fragment outputs are special: handle separately */ - if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) { - assert(!outputs_accessed_indirectly); - u_foreach_bit64(slot, outputs_accessed) { - struct rework_io_state ris = { - .location = slot, - .mode = mode, - .stage = nir->info.stage, - }; - /* explicitly handle builtins */ - switch (slot) { - case FRAG_RESULT_DEPTH: - case FRAG_RESULT_STENCIL: - case FRAG_RESULT_SAMPLE_MASK: - ris.bit_size = 32; - ris.component_mask = 0x1; - ris.type = slot == FRAG_RESULT_DEPTH ? nir_type_float32 : nir_type_uint32; - create_io_var(nir, &ris); - outputs_accessed &= ~BITFIELD64_BIT(slot); - break; - default: - break; - } - } - /* the rest of the outputs can be generated normally */ - loop_io_var_mask(nir, mode, false, false, outputs_accessed); - return; - } - - /* vertex inputs are special: handle separately */ - if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) { - assert(!inputs_read_indirectly); - u_foreach_bit64(slot, inputs_read) { - /* explicitly handle builtins */ - if (slot != VERT_ATTRIB_POS && slot != VERT_ATTRIB_POINT_SIZE) - continue; - - uint32_t component_mask = slot == VERT_ATTRIB_POINT_SIZE ? 0x1 : 0xf; - struct rework_io_state ris = { - .location = slot, - .mode = mode, - .stage = nir->info.stage, - .bit_size = 32, - .component_mask = component_mask, - .type = nir_type_float32, - .newname = scan_io_var_slot(nir, nir_var_shader_in, slot, false).newname, - }; - create_io_var(nir, &ris); - inputs_read &= ~BITFIELD64_BIT(slot); - } - /* the rest of the inputs can be generated normally */ - loop_io_var_mask(nir, mode, false, false, inputs_read); - return; - } - - /* these are the masks to process based on the mode: nothing "special" as above */ - uint64_t mask = mode == nir_var_shader_in ? inputs_read : outputs_accessed; - uint64_t indirect_mask = mode == nir_var_shader_in ? inputs_read_indirectly : outputs_accessed_indirectly; - u_foreach_bit64(slot, mask) { - struct rework_io_state ris = { - .location = slot, - .mode = mode, - .stage = nir->info.stage, - .arrayed_io = (mode == nir_var_shader_in ? zs->arrayed_inputs : zs->arrayed_outputs) & BITFIELD64_BIT(slot), - }; - /* explicitly handle builtins */ - unsigned max_components = 0; - switch (slot) { - case VARYING_SLOT_FOGC: - /* use intr components */ - break; - case VARYING_SLOT_POS: - case VARYING_SLOT_CLIP_VERTEX: - case VARYING_SLOT_PNTC: - case VARYING_SLOT_BOUNDING_BOX0: - case VARYING_SLOT_BOUNDING_BOX1: - max_components = 4; - ris.type = nir_type_float32; - break; - case VARYING_SLOT_CLIP_DIST0: - max_components = nir->info.clip_distance_array_size; - assert(max_components); - ris.type = nir_type_float32; - break; - case VARYING_SLOT_CULL_DIST0: - max_components = nir->info.cull_distance_array_size; - assert(max_components); - ris.type = nir_type_float32; - break; - case VARYING_SLOT_CLIP_DIST1: - case VARYING_SLOT_CULL_DIST1: - mask &= ~BITFIELD64_BIT(slot); - indirect_mask &= ~BITFIELD64_BIT(slot); - continue; - case VARYING_SLOT_TESS_LEVEL_OUTER: - max_components = 4; - ris.type = nir_type_float32; - break; - case VARYING_SLOT_TESS_LEVEL_INNER: - max_components = 2; - ris.type = nir_type_float32; - break; - case VARYING_SLOT_PRIMITIVE_ID: - case VARYING_SLOT_LAYER: - case VARYING_SLOT_VIEWPORT: - case VARYING_SLOT_FACE: - case VARYING_SLOT_VIEW_INDEX: - case VARYING_SLOT_VIEWPORT_MASK: - ris.type = nir_type_int32; - max_components = 1; - break; - case VARYING_SLOT_PSIZ: - max_components = 1; - ris.type = nir_type_float32; - break; - default: - break; - } - if (!max_components) - continue; - switch (slot) { - case VARYING_SLOT_TESS_LEVEL_INNER: - /* actually VARYING_SLOT_PRIMITIVE_INDICES */ - if (nir->info.stage == MESA_SHADER_MESH) { - switch (nir->info.mesh.primitive_type) { - case MESA_PRIM_POINTS: - max_components = 1; - break; - case MESA_PRIM_LINES: - max_components = 2; - break; - default: - max_components = 3; - break; - } - ris.component_mask = BITFIELD_MASK(max_components); - ris.type = nir_type_int32; - break; - } - FALLTHROUGH; - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - case VARYING_SLOT_CULL_DIST0: - case VARYING_SLOT_CULL_DIST1: - case VARYING_SLOT_TESS_LEVEL_OUTER: - /* compact arrays */ - ris.component_mask = 0x1; - ris.array_size = max_components; - break; - default: - ris.component_mask = BITFIELD_MASK(max_components); - break; - } - ris.bit_size = 32; - create_io_var(nir, &ris); - mask &= ~BITFIELD64_BIT(slot); - /* eliminate clip/cull distance scanning early */ - indirect_mask &= ~BITFIELD64_BIT(slot); - } - - /* patch i/o */ - if ((nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) || - (nir->info.stage == MESA_SHADER_TESS_EVAL && mode == nir_var_shader_in)) { - uint64_t patch_outputs_accessed = nir->info.patch_outputs_read | nir->info.patch_outputs_written; - uint64_t indirect_patch_mask = - mode == nir_var_shader_in ? nir->info.patch_inputs_read_indirectly - : (nir->info.patch_outputs_read_indirectly | - nir->info.patch_outputs_written_indirectly); - uint64_t patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read : patch_outputs_accessed; - - loop_io_var_mask(nir, mode, true, true, indirect_patch_mask); - loop_io_var_mask(nir, mode, false, true, patch_mask); - } - - /* regular i/o */ - loop_io_var_mask(nir, mode, true, false, indirect_mask); - loop_io_var_mask(nir, mode, false, false, mask); -} - static int zink_type_size(const struct glsl_type *type, bool bindless) { @@ -5831,274 +5324,6 @@ fix_vertex_input_locations(nir_shader *nir) return nir_shader_intrinsics_pass(nir, fix_vertex_input_locations_instr, nir_metadata_all, NULL); } -struct trivial_revectorize_state { - bool has_xfb; - uint32_t component_mask; - nir_intrinsic_instr *base; - nir_intrinsic_instr *next_emit_vertex; - nir_intrinsic_instr *merge[NIR_MAX_VEC_COMPONENTS]; - struct set *deletions; -}; - -/* always skip xfb; scalarized xfb is preferred */ -static bool -intr_has_xfb(nir_intrinsic_instr *intr) -{ - if (!nir_intrinsic_has_io_xfb(intr)) - return false; - for (unsigned i = 0; i < 2; i++) { - if (nir_intrinsic_io_xfb(intr).out[i].num_components || nir_intrinsic_io_xfb2(intr).out[i].num_components) { - return true; - } - } - return false; -} - -/* helper to avoid vectorizing i/o for different vertices */ -static nir_intrinsic_instr * -find_next_emit_vertex(nir_intrinsic_instr *intr) -{ - bool found = false; - nir_foreach_instr_safe(instr, intr->instr.block) { - if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr); - if (!found && test_intr != intr) - continue; - if (!found) { - assert(intr == test_intr); - found = true; - continue; - } - if (test_intr->intrinsic == nir_intrinsic_emit_vertex) - return test_intr; - } - } - return NULL; -} - -/* scan for vectorizable instrs on a given location */ -static bool -trivial_revectorize_intr_scan(nir_shader *nir, nir_intrinsic_instr *intr, struct trivial_revectorize_state *state) -{ - nir_intrinsic_instr *base = state->base; - - if (intr == base) - return false; - - if (intr->intrinsic != base->intrinsic) - return false; - - if (_mesa_set_search(state->deletions, intr)) - return false; - - bool is_load = false; - bool is_input = false; - bool is_interp = false; - filter_io_instr(intr, &is_load, &is_input, &is_interp); - - nir_io_semantics base_sem = nir_intrinsic_io_semantics(base); - nir_io_semantics test_sem = nir_intrinsic_io_semantics(intr); - nir_alu_type base_type = is_load ? nir_intrinsic_dest_type(base) : nir_intrinsic_src_type(base); - nir_alu_type test_type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr); - int c = nir_intrinsic_component(intr); - /* already detected */ - if (state->component_mask & BITFIELD_BIT(c)) - return false; - /* not a match */ - if (base_sem.location != test_sem.location || base_sem.num_slots != test_sem.num_slots || base_type != test_type) - return false; - /* only vectorize when all srcs match */ - for (unsigned i = !is_input; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) { - if (!nir_srcs_equal(intr->src[i], base->src[i])) - return false; - } - /* never match xfb */ - state->has_xfb |= intr_has_xfb(intr); - if (state->has_xfb) - return false; - if (nir->info.stage == MESA_SHADER_GEOMETRY) { - /* only match same vertex */ - if (state->next_emit_vertex != find_next_emit_vertex(intr)) - return false; - } - uint32_t mask = is_load ? BITFIELD_RANGE(c, intr->num_components) : (nir_intrinsic_write_mask(intr) << c); - state->component_mask |= mask; - u_foreach_bit(component, mask) - state->merge[component] = intr; - - return true; -} - -static bool -trivial_revectorize_scan(struct nir_builder *b, nir_intrinsic_instr *intr, void *data) -{ - bool is_load = false; - bool is_input = false; - bool is_interp = false; - if (!filter_io_instr(intr, &is_load, &is_input, &is_interp)) - return false; - if (intr->num_components != 1) - return false; - nir_io_semantics sem = nir_intrinsic_io_semantics(intr); - if (!is_input || b->shader->info.stage != MESA_SHADER_VERTEX) { - /* always ignore compact arrays */ - switch (sem.location) { - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - case VARYING_SLOT_CULL_DIST0: - case VARYING_SLOT_CULL_DIST1: - case VARYING_SLOT_TESS_LEVEL_INNER: - case VARYING_SLOT_TESS_LEVEL_OUTER: - return false; - default: break; - } - } - /* always ignore to-be-deleted instrs */ - if (_mesa_set_search(data, intr)) - return false; - - /* never vectorize xfb */ - if (intr_has_xfb(intr)) - return false; - - int ic = nir_intrinsic_component(intr); - uint32_t mask = is_load ? BITFIELD_RANGE(ic, intr->num_components) : (nir_intrinsic_write_mask(intr) << ic); - /* already vectorized */ - if (util_bitcount(mask) == 4) - return false; - struct trivial_revectorize_state state = { - .component_mask = mask, - .base = intr, - /* avoid clobbering i/o for different vertices */ - .next_emit_vertex = b->shader->info.stage == MESA_SHADER_GEOMETRY ? find_next_emit_vertex(intr) : NULL, - .deletions = data, - }; - u_foreach_bit(bit, mask) - state.merge[bit] = intr; - bool progress = false; - nir_foreach_instr(instr, intr->instr.block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr); - /* no matching across vertex emission */ - if (test_intr->intrinsic == nir_intrinsic_emit_vertex) - break; - progress |= trivial_revectorize_intr_scan(b->shader, test_intr, &state); - } - if (!progress || state.has_xfb) - return false; - - /* verify nothing crazy happened */ - assert(state.component_mask); - for (unsigned i = 0; i < 4; i++) { - assert(!state.merge[i] || !intr_has_xfb(state.merge[i])); - } - - unsigned first_component = ffs(state.component_mask) - 1; - unsigned num_components = util_bitcount(state.component_mask); - unsigned num_contiguous = 0; - uint32_t contiguous_mask = 0; - for (unsigned i = 0; i < num_components; i++) { - unsigned c = i + first_component; - /* calc mask of contiguous components to vectorize */ - if (state.component_mask & BITFIELD_BIT(c)) { - num_contiguous++; - contiguous_mask |= BITFIELD_BIT(c); - } - /* on the first gap or the the last component, vectorize */ - if (!(state.component_mask & BITFIELD_BIT(c)) || i == num_components - 1) { - if (num_contiguous > 1) { - /* reindex to enable easy src/dest index comparison */ - nir_index_ssa_defs(nir_shader_get_entrypoint(b->shader)); - /* determine the first/last instr to use for the base (vectorized) load/store */ - unsigned first_c = ffs(contiguous_mask) - 1; - nir_intrinsic_instr *base = NULL; - unsigned test_idx = is_load ? UINT32_MAX : 0; - for (unsigned j = 0; j < num_contiguous; j++) { - unsigned merge_c = j + first_c; - nir_intrinsic_instr *merge_intr = state.merge[merge_c]; - /* avoid breaking ssa ordering by using: - * - first instr for vectorized load - * - last instr for vectorized store - * this guarantees all srcs have been seen - */ - if ((is_load && merge_intr->def.index < test_idx) || - (!is_load && merge_intr->src[0].ssa->index >= test_idx)) { - test_idx = is_load ? merge_intr->def.index : merge_intr->src[0].ssa->index; - base = merge_intr; - } - } - assert(base); - /* update instr components */ - nir_intrinsic_set_component(base, nir_intrinsic_component(state.merge[first_c])); - unsigned orig_components = base->num_components; - base->num_components = num_contiguous; - /* do rewrites after loads and before stores */ - b->cursor = is_load ? nir_after_instr(&base->instr) : nir_before_instr(&base->instr); - if (is_load) { - base->def.num_components = num_contiguous; - /* iterate the contiguous loaded components and rewrite merged dests */ - for (unsigned j = 0; j < num_contiguous; j++) { - unsigned merge_c = j + first_c; - nir_intrinsic_instr *merge_intr = state.merge[merge_c]; - /* detect if the merged instr loaded multiple components and use swizzle mask for rewrite */ - unsigned use_components = merge_intr == base ? orig_components : merge_intr->def.num_components; - nir_def *swiz = nir_channels(b, &base->def, BITFIELD_RANGE(j, use_components)); - nir_def_rewrite_uses_after_instr(&merge_intr->def, swiz, merge_intr == base ? nir_def_instr(swiz) : &merge_intr->instr); - j += use_components - 1; - } - } else { - nir_def *comp[NIR_MAX_VEC_COMPONENTS]; - /* generate swizzled vec of store components and rewrite store src */ - for (unsigned j = 0; j < num_contiguous; j++) { - unsigned merge_c = j + first_c; - nir_intrinsic_instr *merge_intr = state.merge[merge_c]; - /* detect if the merged instr stored multiple components and extract them for rewrite */ - unsigned use_components = merge_intr == base ? orig_components : merge_intr->num_components; - for (unsigned k = 0; k < use_components; k++) - comp[j + k] = nir_channel(b, merge_intr->src[0].ssa, k); - j += use_components - 1; - } - nir_def *val = nir_vec(b, comp, num_contiguous); - nir_src_rewrite(&base->src[0], val); - nir_intrinsic_set_write_mask(base, BITFIELD_MASK(num_contiguous)); - } - /* deleting instructions during a foreach explodes the compiler, so delete later */ - for (unsigned j = 0; j < num_contiguous; j++) { - unsigned merge_c = j + first_c; - nir_intrinsic_instr *merge_intr = state.merge[merge_c]; - if (merge_intr != base) - _mesa_set_add(data, &merge_intr->instr); - } - } - contiguous_mask = 0; - num_contiguous = 0; - } - } - - return true; -} - -/* attempt to revectorize scalar i/o, ignoring xfb and "hard stuff" */ -static bool -trivial_revectorize(nir_shader *nir) -{ - struct set deletions; - - if (nir->info.stage > MESA_SHADER_FRAGMENT) - return false; - - _mesa_set_init(&deletions, NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - bool progress = nir_shader_intrinsics_pass(nir, trivial_revectorize_scan, nir_metadata_control_flow, &deletions); - /* now it's safe to delete */ - set_foreach_remove(&deletions, entry) { - nir_instr *instr = (void*)entry->key; - nir_instr_remove(instr); - } - _mesa_set_fini(&deletions, NULL); - return progress; -} - static bool flatten_image_arrays_intr(struct nir_builder *b, nir_instr *instr, void *data) {