diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt index bbf8562f9d7..0b90047a3fb 100644 --- a/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-tu-a630-fails.txt @@ -173,8 +173,6 @@ KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec2,Fail KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec3,Fail KHR-GL46.gpu_shader_fp64.builtin.trunc_dvec4,Fail KHR-GL46.gpu_shader_fp64.fp64.max_uniform_components,Fail -KHR-GL46.gpu_shader_fp64.fp64.named_uniform_blocks,Crash -KHR-GL46.gpu_shader_fp64.fp64.varyings,Crash KHR-GL46.limits.max_fragment_input_components,Fail KHR-GL46.packed_pixels.pbo_rectangle.r11f_g11f_b10f,Fail KHR-GL46.packed_pixels.pbo_rectangle.r16,Fail @@ -235,19 +233,13 @@ KHR-GL46.texture_cube_map_array.color_depth_attachments,Fail KHR-GL46.texture_cube_map_array.image_op_geometry_sh,Fail KHR-GL46.texture_view.view_classes,Fail KHR-GL46.vertex_attrib_64bit.vao,Fail -KHR-GL46.vertex_attrib_binding.advanced-bindingUpdate,Crash KHR-Single-GL46.arrays_of_arrays_gl.AtomicUsage,Fail KHR-Single-GL46.arrays_of_arrays_gl.SubroutineFunctionCalls2,Crash KHR-Single-GL46.enhanced_layouts.ssb_member_offset_and_align,Fail KHR-Single-GL46.enhanced_layouts.uniform_block_member_offset_and_align,Fail -KHR-Single-GL46.enhanced_layouts.varying_array_components,Crash -KHR-Single-GL46.enhanced_layouts.varying_array_locations,Crash -KHR-Single-GL46.enhanced_layouts.varying_components,Crash -KHR-Single-GL46.enhanced_layouts.varying_locations,Crash KHR-Single-GL46.enhanced_layouts.varying_structure_locations,Crash KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_block_member,Fail KHR-Single-GL46.enhanced_layouts.xfb_capture_struct,Fail -KHR-Single-GL46.enhanced_layouts.xfb_explicit_location,Crash KHR-Single-GL46.enhanced_layouts.xfb_global_buffer,Fail KHR-Single-GL46.enhanced_layouts.xfb_override_qualifiers_with_api,Fail KHR-Single-GL46.enhanced_layouts.xfb_stride,Fail @@ -259,10 +251,6 @@ dEQP-GLES3.functional.occlusion_query.scissor_depth_clear_stencil_clear,Fail dEQP-GLES3.functional.occlusion_query.scissor_stencil_clear,Fail dEQP-GLES3.functional.occlusion_query.stencil_clear,Fail dEQP-GLES3.functional.shaders.loops.for_uniform_iterations.unconditional_continue_fragment,Crash -dEQP-GLES3.functional.transform_feedback.basic_types.interleaved.lines.highp_mat3x4,Fail -dEQP-GLES3.functional.transform_feedback.basic_types.interleaved.lines.mediump_vec3,Fail -dEQP-GLES3.functional.transform_feedback.basic_types.interleaved.points.mediump_vec4,Fail -dEQP-GLES3.functional.transform_feedback.basic_types.separate.points.highp_vec3,Fail dEQP-GLES31.functional.blend_equation_advanced.barrier.colorburn,Fail dEQP-GLES31.functional.blend_equation_advanced.barrier.colordodge,Fail dEQP-GLES31.functional.blend_equation_advanced.barrier.darken,Fail diff --git a/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt b/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt index fef05d14ab3..fb03e671d01 100644 --- a/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt +++ b/src/gallium/drivers/zink/ci/zink-tu-a630-skips.txt @@ -1,2 +1,2 @@ -# #6490 -KHR-Single-GL46.arrays_of_arrays_gl.InteractionFunctionCalls2 +# takes forever, but passes +KHR-GL46.texture_swizzle.smoke diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 01f08899dfb..aeb2eff55e0 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -1339,6 +1339,323 @@ zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer) optimize_nir(nir); } +/* all types that hit this function contain something that is 64bit */ +static const struct glsl_type * +rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var) +{ + if (glsl_type_is_array(type)) { + const struct glsl_type *child = glsl_get_array_element(type); + unsigned elements = glsl_get_aoa_size(type); + unsigned stride = glsl_get_explicit_stride(type); + return glsl_array_type(rewrite_64bit_type(nir, child, var), elements, stride); + } + /* rewrite structs recursively */ + if (glsl_type_is_struct_or_ifc(type)) { + unsigned nmembers = glsl_get_length(type); + struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2); + unsigned xfb_offset = 0; + for (unsigned i = 0; i < nmembers; i++) { + const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i); + fields[i] = *f; + xfb_offset += glsl_get_component_slots(fields[i].type) * 4; + if (i < nmembers - 1 && xfb_offset % 8 && + glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1))) { + var->data.is_xfb = true; + } + fields[i].type = rewrite_64bit_type(nir, f->type, var); + } + return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type)); + } + if (!glsl_type_is_64bit(type)) + return type; + enum glsl_base_type base_type; + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT64: + base_type = GLSL_TYPE_UINT; + break; + case GLSL_TYPE_INT64: + base_type = GLSL_TYPE_INT; + break; + case GLSL_TYPE_DOUBLE: + base_type = GLSL_TYPE_FLOAT; + break; + default: + unreachable("unknown 64-bit vertex attribute format!"); + } + if (glsl_type_is_scalar(type)) + return glsl_vector_type(base_type, 2); + unsigned num_components; + if (glsl_type_is_matrix(type)) { + /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */ + unsigned vec_components = glsl_get_vector_elements(type); + if (vec_components == 3) + vec_components = 4; + num_components = vec_components * 2 * glsl_get_matrix_columns(type); + } else { + num_components = glsl_get_vector_elements(type) * 2; + if (num_components <= 4) + return glsl_vector_type(base_type, num_components); + } + /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */ + struct glsl_struct_field fields[8] = {0}; + unsigned remaining = num_components; + unsigned nfields = 0; + for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) { + assert(i < ARRAY_SIZE(fields)); + fields[i].name = ""; + fields[i].offset = i * 16; + fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining)); + } + char buf[64]; + snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type)); + return glsl_struct_type(fields, nfields, buf, true); +} + +static const struct glsl_type * +deref_is_matrix(nir_deref_instr *deref) +{ + if (glsl_type_is_matrix(deref->type)) + return deref->type; + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (parent) + return deref_is_matrix(parent); + return NULL; +} + +/* rewrite all input/output variables using 32bit types and load/stores */ +static bool +lower_64bit_vars(nir_shader *shader) +{ + bool progress = false; + struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out) { + if (!glsl_type_contains_64bit(var->type)) + continue; + var->type = rewrite_64bit_type(shader, var->type, var); + /* once type is rewritten, rewrite all loads and stores */ + nir_foreach_function(function, shader) { + bool func_progress = false; + if (!function->impl) + continue; + nir_builder b; + nir_builder_init(&b, function->impl); + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + switch (instr->type) { + case nir_instr_type_deref: { + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (!(deref->modes & (nir_var_shader_in | nir_var_shader_out))) + continue; + if (nir_deref_instr_get_variable(deref) != var) + continue; + + /* matrix types are special: store the original deref type for later use */ + const struct glsl_type *matrix = deref_is_matrix(deref); + nir_deref_instr *parent = nir_deref_instr_parent(deref); + if (!matrix) { + /* if this isn't a direct matrix deref, it's maybe a matrix row deref */ + hash_table_foreach(derefs, he) { + /* propagate parent matrix type to row deref */ + if (he->key == parent) + matrix = he->data; + } + } + if (matrix) + _mesa_hash_table_insert(derefs, deref, (void*)matrix); + if (deref->deref_type == nir_deref_type_var) + deref->type = var->type; + else + deref->type = rewrite_64bit_type(shader, deref->type, var); + } + break; + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_deref && + intr->intrinsic != nir_intrinsic_load_deref) + break; + if (nir_intrinsic_get_var(intr, 0) != var) + break; + if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) || + (intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64)) + break; + b.cursor = nir_before_instr(instr); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + unsigned num_components = intr->num_components * 2; + nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS]; + /* this is the stored matrix type from the deref */ + struct hash_entry *he = _mesa_hash_table_search(derefs, deref); + const struct glsl_type *matrix = he ? he->data : NULL; + func_progress = true; + if (intr->intrinsic == nir_intrinsic_store_deref) { + /* first, unpack the src data to 32bit vec2 components */ + for (unsigned i = 0; i < intr->num_components; i++) { + nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i)); + comp[i * 2] = nir_channel(&b, ssa, 0); + comp[i * 2 + 1] = nir_channel(&b, ssa, 1); + } + unsigned wrmask = nir_intrinsic_write_mask(intr); + unsigned mask = 0; + /* expand writemask for doubled components */ + for (unsigned i = 0; i < intr->num_components; i++) { + if (wrmask & BITFIELD_BIT(i)) + mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1); + } + if (matrix) { + /* matrix types always come from array (row) derefs */ + assert(deref->deref_type == nir_deref_type_array); + nir_deref_instr *var_deref = nir_deref_instr_parent(deref); + /* let optimization clean up consts later */ + nir_ssa_def *index = deref->arr.index.ssa; + /* this might be an indirect array index: + * - iterate over matrix columns + * - add if blocks for each column + * - perform the store in the block + */ + for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) { + nir_push_if(&b, nir_ieq_imm(&b, index, idx)); + unsigned vec_components = glsl_get_vector_elements(matrix); + /* always clamp dvec3 to 4 components */ + if (vec_components == 3) + vec_components = 4; + unsigned start_component = idx * vec_components * 2; + /* struct member */ + unsigned member = start_component / 4; + /* number of components remaining */ + unsigned remaining = num_components; + for (unsigned i = 0; i < num_components; member++) { + if (!(mask & BITFIELD_BIT(i))) + continue; + assert(member < glsl_get_length(var_deref->type)); + /* deref the rewritten struct to the appropriate vec4/vec2 */ + nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member); + unsigned incr = MIN2(remaining, 4); + /* assemble the write component vec */ + nir_ssa_def *val = nir_vec(&b, &comp[i], incr); + /* use the number of components being written as the writemask */ + if (glsl_get_vector_elements(strct->type) > val->num_components) + val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type)); + nir_store_deref(&b, strct, val, BITFIELD_MASK(incr)); + remaining -= incr; + i += incr; + } + nir_pop_if(&b, NULL); + } + _mesa_set_add(deletes, &deref->instr); + } else if (num_components <= 4) { + /* simple store case: just write out the components */ + nir_ssa_def *dest = nir_vec(&b, comp, num_components); + nir_store_deref(&b, deref, dest, mask); + } else { + /* writing > 4 components: access the struct and write to the appropriate vec4 members */ + for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) { + if (!(mask & BITFIELD_MASK(4))) + continue; + nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i); + nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4)); + if (glsl_get_vector_elements(strct->type) > dest->num_components) + dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type)); + nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4)); + mask >>= 4; + } + } + } else { + nir_ssa_def *dest = NULL; + if (matrix) { + /* matrix types always come from array (row) derefs */ + assert(deref->deref_type == nir_deref_type_array); + nir_deref_instr *var_deref = nir_deref_instr_parent(deref); + /* let optimization clean up consts later */ + nir_ssa_def *index = deref->arr.index.ssa; + /* this might be an indirect array index: + * - iterate over matrix columns + * - add if blocks for each column + * - phi the loads using the array index + */ + unsigned cols = glsl_get_matrix_columns(matrix); + nir_ssa_def *dests[4]; + for (unsigned idx = 0; idx < cols; idx++) { + /* don't add an if for the final row: this will be handled in the else */ + if (idx < cols - 1) + nir_push_if(&b, nir_ieq_imm(&b, index, idx)); + unsigned vec_components = glsl_get_vector_elements(matrix); + /* always clamp dvec3 to 4 components */ + if (vec_components == 3) + vec_components = 4; + unsigned start_component = idx * vec_components * 2; + /* struct member */ + unsigned member = start_component / 4; + /* number of components remaining */ + unsigned remaining = num_components; + /* component index */ + unsigned comp_idx = 0; + for (unsigned i = 0; i < num_components; member++) { + assert(member < glsl_get_length(var_deref->type)); + nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member); + nir_ssa_def *load = nir_load_deref(&b, strct); + unsigned incr = MIN2(remaining, 4); + /* repack the loads to 64bit */ + for (unsigned c = 0; c < incr / 2; c++, comp_idx++) + comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2))); + remaining -= incr; + i += incr; + } + dest = dests[idx] = nir_vec(&b, comp, intr->num_components); + if (idx < cols - 1) + nir_push_else(&b, NULL); + } + /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */ + for (unsigned idx = cols - 1; idx >= 1; idx--) { + nir_pop_if(&b, NULL); + dest = nir_if_phi(&b, dests[idx - 1], dest); + } + _mesa_set_add(deletes, &deref->instr); + } else if (num_components <= 4) { + /* simple load case */ + nir_ssa_def *load = nir_load_deref(&b, deref); + /* pack 32bit loads into 64bit: this will automagically get optimized out later */ + for (unsigned i = 0; i < intr->num_components; i++) { + comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2))); + } + dest = nir_vec(&b, comp, intr->num_components); + } else { + /* writing > 4 components: access the struct and load the appropriate vec4 members */ + for (unsigned i = 0; i < 2; i++, num_components -= 4) { + nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i); + nir_ssa_def *load = nir_load_deref(&b, strct); + comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2))); + if (num_components > 2) + comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2))); + } + dest = nir_vec(&b, comp, intr->num_components); + } + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr); + } + _mesa_set_add(deletes, instr); + break; + } + break; + default: break; + } + } + } + if (func_progress) + nir_metadata_preserve(function->impl, nir_metadata_none); + /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */ + set_foreach_remove(deletes, he) + nir_instr_remove((void*)he->key); + } + progress = true; + } + ralloc_free(deletes); + ralloc_free(derefs); + if (progress) { + nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL); + nir_lower_phis_to_scalar(shader, false); + } + return progress; +} + static void zink_shader_dump(void *words, size_t size, const char *file) { @@ -2395,6 +2712,9 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless); ret->bindless |= bindless_lowered; + if (!screen->info.feats.features.shaderInt64) + NIR_PASS_V(nir, lower_64bit_vars); + ret->nir = nir; if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings) update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);