diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 73a6e8d2a73..f52bd3cbfaf 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -72,7 +72,8 @@ struct ntv_context { SpvId front_face_var, instance_id_var, vertex_id_var, primitive_id_var, invocation_id_var, // geometry sample_mask_type, sample_id_var, sample_pos_var, - tess_patch_vertices_in, tess_coord_var; // tess + tess_patch_vertices_in, tess_coord_var, // tess + push_const_var; }; static SpvId @@ -320,19 +321,28 @@ handle_handle_slot(struct ntv_context *ctx, struct nir_variable *var) return handle_slot(ctx, var->data.location); } -static void -emit_input(struct ntv_context *ctx, struct nir_variable *var) +static SpvId +input_var_init(struct ntv_context *ctx, struct nir_variable *var) { SpvId var_type = get_glsl_type(ctx, var->type); + SpvStorageClass sc = get_storage_class(var); + if (sc == SpvStorageClassPushConstant) + spirv_builder_emit_decoration(&ctx->builder, var_type, SpvDecorationBlock); SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassInput, - var_type); - SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, - SpvStorageClassInput); + sc, var_type); + SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, sc); if (var->name) spirv_builder_emit_name(&ctx->builder, var_id, var->name); + if (var->data.mode == nir_var_mem_push_const) + ctx->push_const_var = var_id; + return var_id; +} +static void +emit_input(struct ntv_context *ctx, struct nir_variable *var) +{ + SpvId var_id = input_var_init(ctx, var); unsigned slot = var->data.location; if (ctx->stage == MESA_SHADER_VERTEX) spirv_builder_emit_location(&ctx->builder, var_id, @@ -1768,6 +1778,81 @@ emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) spirv_builder_emit_store(&ctx->builder, ptr, result); } +/* FIXME: this is currently VERY specific to injected TCS usage */ +static void +emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + unsigned bit_size = nir_dest_bit_size(intr->dest); + SpvId uint_type = get_uvec_type(ctx, 32, 1); + SpvId load_type = get_uvec_type(ctx, 32, 1); + + /* number of components being loaded */ + unsigned num_components = nir_dest_num_components(intr->dest); + /* we need to grab 2x32 to fill the 64bit value */ + if (bit_size == 64) + num_components *= 2; + SpvId constituents[num_components]; + SpvId result; + + /* destination type for the load */ + SpvId type = get_dest_uvec_type(ctx, &intr->dest); + /* an id of an array member in bytes */ + SpvId uint_size = emit_uint_const(ctx, 32, sizeof(uint32_t)); + SpvId one = emit_uint_const(ctx, 32, 1); + + /* we grab a single array member at a time, so it's a pointer to a uint */ + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPushConstant, + load_type); + + SpvId member = emit_uint_const(ctx, 32, 0); + /* this is the offset (in bytes) that we're accessing: + * it may be a const value or it may be dynamic in the shader + */ + SpvId offset = get_src(ctx, &intr->src[0]); + offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size); + /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type: + * index 0 is accessing 'base' + * index 1 is accessing 'base[index 1]' + * + */ + for (unsigned i = 0; i < num_components; i++) { + SpvId indices[2] = { member, offset }; + SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, + ctx->push_const_var, indices, + ARRAY_SIZE(indices)); + /* load a single value into the constituents array */ + constituents[i] = spirv_builder_emit_load(&ctx->builder, load_type, ptr); + /* increment to the next vec4 member index for the next load */ + offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one); + } + + /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values + * by creating uvec2 composites and bitcasting them to u64 values + */ + if (bit_size == 64) { + num_components /= 2; + type = get_uvec_type(ctx, 64, num_components); + SpvId u64_type = get_uvec_type(ctx, 64, 1); + for (unsigned i = 0; i < num_components; i++) { + constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2); + constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]); + } + } + /* if loading more than 1 value, reassemble the results into the desired type, + * otherwise just use the loaded result + */ + if (num_components > 1) { + result = spirv_builder_emit_composite_construct(&ctx->builder, + type, + constituents, + num_components); + } else + result = constituents[0]; + + store_dest(ctx, &intr->dest, result, nir_type_uint); +} + static SpvId create_builtin_var(struct ntv_context *ctx, SpvId var_type, SpvStorageClass storage_class, @@ -1882,6 +1967,10 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_store_deref(ctx, intr); break; + case nir_intrinsic_load_push_constant: + emit_load_push_const(ctx, intr); + break; + case nir_intrinsic_load_front_face: emit_load_front_face(ctx, intr); break; @@ -2660,6 +2749,9 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, ctx.so_outputs = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_u32, _mesa_key_u32_equal); + nir_foreach_variable_with_modes(var, s, nir_var_mem_push_const) + input_var_init(&ctx, var); + nir_foreach_shader_in_variable(var, s) emit_input(&ctx, var); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 83722e09647..5e2516e6194 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -507,6 +507,9 @@ zink_shader_free(struct zink_context *ctx, struct zink_shader *shader) struct zink_gfx_program *prog = (void*)entry->key; _mesa_hash_table_remove_key(ctx->program_cache, prog->shaders); prog->shaders[pipe_shader_type_from_mesa(shader->nir->info.stage)] = NULL; + if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) + /* automatically destroy generated tcs shaders when tes is destroyed */ + zink_shader_free(ctx, shader->generated); zink_gfx_program_reference(screen, &prog, NULL); } _mesa_set_destroy(shader->programs, NULL); @@ -514,3 +517,135 @@ zink_shader_free(struct zink_context *ctx, struct zink_shader *shader) ralloc_free(shader->nir); FREE(shader); } + + +/* creating a passthrough tcs shader that's roughly: + +#version 150 +#extension GL_ARB_tessellation_shader : require + +in vec4 some_var[gl_MaxPatchVertices]; +out vec4 some_var_out; + +layout(push_constant) uniform tcsPushConstants { + layout(offset = 0) float TessLevelInner[2]; + layout(offset = 8) float TessLevelOuter[4]; +} u_tcsPushConstants; +layout(vertices = $vertices_per_patch) out; +void main() +{ + gl_TessLevelInner = u_tcsPushConstants.TessLevelInner; + gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter; + some_var_out = some_var[gl_InvocationID]; +} + +*/ +struct zink_shader * +zink_shader_tcs_create(struct zink_context *ctx, struct zink_shader *vs) +{ + unsigned vertices_per_patch = ctx->gfx_pipeline_state.vertices_per_patch; + struct zink_shader *ret = CALLOC_STRUCT(zink_shader); + ret->shader_id = 0; //special value for internal shaders + ret->programs = _mesa_pointer_set_create(NULL); + + nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &nir_options, NULL); + nir_function *fn = nir_function_create(nir, "main"); + fn->is_entrypoint = true; + nir_function_impl *impl = nir_function_impl_create(fn); + + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_before_block(nir_start_block(impl)); + + nir_intrinsic_instr *invocation_id = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_invocation_id); + nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest, 1, 32, "gl_InvocationID"); + nir_builder_instr_insert(&b, &invocation_id->instr); + + nir_foreach_shader_out_variable(var, vs->nir) { + const struct glsl_type *type = var->type; + const struct glsl_type *in_type = var->type; + const struct glsl_type *out_type = var->type; + char buf[1024]; + snprintf(buf, sizeof(buf), "%s_out", var->name); + in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0); + out_type = glsl_array_type(type, vertices_per_patch, 0); + + nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name); + nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf); + out->data.location = in->data.location = var->data.location; + out->data.location_frac = in->data.location_frac = var->data.location_frac; + + /* gl_in[] receives values from equivalent built-in output + variables written by the vertex shader (section 2.14.7). Each array + element of gl_in[] is a structure holding values for a specific vertex of + the input patch. The length of gl_in[] is equal to the + implementation-dependent maximum patch size (gl_MaxPatchVertices). + - ARB_tessellation_shader + */ + for (unsigned i = 0; i < vertices_per_patch; i++) { + /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */ + nir_if *start_block = nir_push_if(&b, nir_ieq(&b, &invocation_id->dest.ssa, nir_imm_int(&b, i))); + nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), &invocation_id->dest.ssa); + nir_ssa_def *load = nir_load_deref(&b, in_array_var); + nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i); + nir_store_deref(&b, out_array_var, load, 0xff); + nir_pop_if(&b, start_block); + } + } + nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner"); + gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER; + gl_TessLevelInner->data.patch = 1; + nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter"); + gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER; + gl_TessLevelOuter->data.patch = 1; + + /* hacks so we can size these right for now */ + struct glsl_struct_field *fields = ralloc_size(nir, 2 * sizeof(struct glsl_struct_field)); + fields[0].type = glsl_array_type(glsl_uint_type(), 2, 0); + fields[0].name = ralloc_asprintf(nir, "gl_TessLevelInner"); + fields[0].offset = 0; + fields[1].type = glsl_array_type(glsl_uint_type(), 4, 0); + fields[1].name = ralloc_asprintf(nir, "gl_TessLevelOuter"); + fields[1].offset = 8; + nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const, + glsl_struct_type(fields, 2, "struct", false), "pushconst"); + pushconst->data.location = VARYING_SLOT_VAR0; + + nir_intrinsic_instr *load_inner = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); + load_inner->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + nir_intrinsic_set_base(load_inner, 0); + nir_intrinsic_set_range(load_inner, 8); + load_inner->num_components = 2; + nir_ssa_dest_init(&load_inner->instr, &load_inner->dest, 2, 32, "TessLevelInner"); + nir_builder_instr_insert(&b, &load_inner->instr); + + nir_intrinsic_instr *load_outer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant); + load_outer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8)); + nir_intrinsic_set_base(load_outer, 8); + nir_intrinsic_set_range(load_outer, 16); + load_outer->num_components = 4; + nir_ssa_dest_init(&load_outer->instr, &load_outer->dest, 4, 32, "TessLevelOuter"); + nir_builder_instr_insert(&b, &load_outer->instr); + + for (unsigned i = 0; i < 2; i++) { + nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i); + nir_store_deref(&b, store_idx, nir_channel(&b, &load_inner->dest.ssa, i), 0xff); + } + for (unsigned i = 0; i < 4; i++) { + nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i); + nir_store_deref(&b, store_idx, nir_channel(&b, &load_outer->dest.ssa, i), 0xff); + } + + nir->info.tess.tcs_vertices_out = vertices_per_patch; + nir_validate_shader(nir, "created"); + + NIR_PASS_V(nir, nir_lower_regs_to_ssa); + optimize_nir(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + NIR_PASS_V(nir, lower_discard_if); + NIR_PASS_V(nir, nir_convert_from_ssa, true); + + ret->nir = nir; + ret->is_generated = true; + return ret; +} diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index 47e6bcfcb0d..379595876e6 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -74,6 +74,10 @@ struct zink_shader { bool has_tess_shader; // vertex shaders need to know if a tesseval shader exists bool has_geometry_shader; // vertex shaders need to know if a geometry shader exists + union { + struct zink_shader *generated; // a generated shader that this shader "owns" + bool is_generated; // if this is a driver-created shader (e.g., tcs) + }; }; VkShaderModule @@ -87,4 +91,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, void zink_shader_free(struct zink_context *ctx, struct zink_shader *shader); +struct zink_shader * +zink_shader_tcs_create(struct zink_context *ctx, struct zink_shader *vs); + #endif diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index 04a1a2fe852..bb038001d7e 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -128,8 +128,13 @@ struct zink_context { struct pipe_stencil_ref stencil_ref; - float default_inner_level[2]; - float default_outer_level[4]; + union { + struct { + float default_inner_level[2]; + float default_outer_level[4]; + }; + float tess_levels[6]; + }; struct list_head suspended_queries; struct list_head primitives_generated_queries; diff --git a/src/gallium/drivers/zink/zink_draw.c b/src/gallium/drivers/zink/zink_draw.c index 86c26dcff75..15281b478fb 100644 --- a/src/gallium/drivers/zink/zink_draw.c +++ b/src/gallium/drivers/zink/zink_draw.c @@ -241,7 +241,9 @@ zink_draw_vbo(struct pipe_context *pctx, util_primconvert_draw_vbo(ctx->primconvert, dinfo, &draws[0]); return; } - + if (ctx->gfx_pipeline_state.vertices_per_patch != dinfo->vertices_per_patch) + ctx->gfx_pipeline_state.hash = 0; + ctx->gfx_pipeline_state.vertices_per_patch = dinfo->vertices_per_patch; struct zink_gfx_program *gfx_program = get_gfx_program(ctx); if (!gfx_program) return; @@ -249,7 +251,6 @@ zink_draw_vbo(struct pipe_context *pctx, if (ctx->gfx_pipeline_state.primitive_restart != !!dinfo->primitive_restart) ctx->gfx_pipeline_state.hash = 0; ctx->gfx_pipeline_state.primitive_restart = !!dinfo->primitive_restart; - ctx->gfx_pipeline_state.vertices_per_patch = dinfo->vertices_per_patch; VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program, &ctx->gfx_pipeline_state, @@ -477,6 +478,11 @@ zink_draw_vbo(struct pipe_context *pctx, gfx_program->layout, 0, 1, &desc_set, 0, NULL); zink_bind_vertex_buffers(batch, ctx); + if (gfx_program->shaders[PIPE_SHADER_TESS_CTRL] && gfx_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated) + vkCmdPushConstants(batch->cmdbuf, gfx_program->layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + 0, sizeof(float) * 6, + &ctx->tess_levels[0]); + zink_query_update_gs_states(ctx); if (ctx->num_so_targets) { diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index 55a428e56d5..89ef837ba1b 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -151,6 +151,14 @@ create_pipeline_layout(VkDevice dev, VkDescriptorSetLayout dsl) plci.pSetLayouts = &dsl; plci.setLayoutCount = 1; + + VkPushConstantRange pcr = {}; + pcr.stageFlags = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + pcr.offset = 0; + pcr.size = sizeof(float) * 6; + plci.pushConstantRangeCount = 1; + plci.pPushConstantRanges = &pcr; + VkPipelineLayout layout; if (vkCreatePipelineLayout(dev, &plci, NULL, &layout) != VK_SUCCESS) { debug_printf("vkCreatePipelineLayout failed!\n"); @@ -179,6 +187,18 @@ shader_key_fs_gen(struct zink_context *ctx, struct zink_shader *zs, fs_key->samples = !!ctx->fb_state.samples; } +static void +shader_key_tcs_gen(struct zink_context *ctx, struct zink_shader *zs, + struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key) +{ + struct zink_tcs_key *tcs_key = &key->key.tcs; + key->size = sizeof(struct zink_tcs_key); + + tcs_key->shader_id = zs->shader_id; + tcs_key->vertices_per_patch = ctx->gfx_pipeline_state.vertices_per_patch; + tcs_key->vs_outputs_written = shaders[PIPE_SHADER_VERTEX]->nir->info.outputs_written; +} + static void shader_key_dummy_gen(struct zink_context *ctx, struct zink_shader *zs, struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key) @@ -195,7 +215,7 @@ typedef void (*zink_shader_key_gen)(struct zink_context *ctx, struct zink_shader static zink_shader_key_gen shader_key_vtbl[] = { [MESA_SHADER_VERTEX] = shader_key_dummy_gen, - [MESA_SHADER_TESS_CTRL] = shader_key_dummy_gen, + [MESA_SHADER_TESS_CTRL] = shader_key_tcs_gen, [MESA_SHADER_TESS_EVAL] = shader_key_dummy_gen, [MESA_SHADER_GEOMETRY] = shader_key_dummy_gen, [MESA_SHADER_FRAGMENT] = shader_key_fs_gen, @@ -298,6 +318,12 @@ update_shader_modules(struct zink_context *ctx, struct zink_shader *stages[ZINK_ unsigned type = u_bit_scan(&dirty_shader_stages); dirty[tgsi_processor_to_shader_stage(type)] = stages[type]; } + if (ctx->dirty_shader_stages & (1 << PIPE_SHADER_TESS_EVAL)) { + if (dirty[MESA_SHADER_TESS_EVAL] && !dirty[MESA_SHADER_TESS_CTRL]) { + dirty[MESA_SHADER_TESS_CTRL] = stages[PIPE_SHADER_TESS_CTRL] = zink_shader_tcs_create(ctx, stages[PIPE_SHADER_VERTEX]); + dirty[MESA_SHADER_TESS_EVAL]->generated = stages[PIPE_SHADER_TESS_CTRL]; + } + } for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { enum pipe_shader_type type = pipe_shader_type_from_mesa(i); diff --git a/src/gallium/drivers/zink/zink_shader_keys.h b/src/gallium/drivers/zink/zink_shader_keys.h index 92836b6939b..962651f1e6d 100644 --- a/src/gallium/drivers/zink/zink_shader_keys.h +++ b/src/gallium/drivers/zink/zink_shader_keys.h @@ -32,6 +32,12 @@ struct zink_fs_key { bool samples; }; +struct zink_tcs_key { + unsigned shader_id; + unsigned vertices_per_patch; + uint64_t vs_outputs_written; +}; + /* a shader key is used for swapping out shader modules based on pipeline states, * e.g., if sampleCount changes, we must verify that the fs doesn't need a recompile * to account for GL ignoring gl_SampleMask in some cases when VK will not @@ -40,6 +46,7 @@ struct zink_fs_key { struct zink_shader_key { union { struct zink_fs_key fs; + struct zink_tcs_key tcs; } key; uint32_t size; };