diff --git a/src/gallium/drivers/d3d12/ci/d3d12-flakes.txt b/src/gallium/drivers/d3d12/ci/d3d12-flakes.txt index 89eb9056d6c..3c142d409fd 100644 --- a/src/gallium/drivers/d3d12/ci/d3d12-flakes.txt +++ b/src/gallium/drivers/d3d12/ci/d3d12-flakes.txt @@ -1,5 +1,6 @@ # Tessellation isoline tests are flaky on WARP, probably a WARP bug isoline +isoline-no-tcs # If the test machine are busy, these can spuriously fail arb_timer_query diff --git a/src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt b/src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt index 5d1a3e63db8..927b7a253b8 100644 --- a/src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt +++ b/src/gallium/drivers/d3d12/ci/d3d12-quick_shader.txt @@ -15,7 +15,6 @@ spec@glsl-1.50@execution@variable-indexing@gs-output-array-vec4-index-wr,Crash # These tests use a TCS output variable only as temporary storage. Since the output # is unused by the TES, we remove it. -spec@arb_tessellation_shader@execution@barrier,Fail spec@arb_tessellation_shader@execution@barrier-patch,Crash spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-float-index-wr-before-barrier,Fail spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec2-index-rd-after-barrier,Fail diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.cpp b/src/gallium/drivers/d3d12/d3d12_compiler.cpp index cdefd9b6c14..6f066069518 100644 --- a/src/gallium/drivers/d3d12/d3d12_compiler.cpp +++ b/src/gallium/drivers/d3d12/d3d12_compiler.cpp @@ -1173,6 +1173,13 @@ select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_sele if (prev) { NIR_PASS(_, new_nir_variant, dxil_nir_kill_undefined_varyings, key.prev_varying_outputs, prev->initial->info.patch_outputs_written, key.prev_varying_frac_outputs); + + /* Pad TES input signature so HS<->DS match for D3D12 pipeline validation. */ + if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL && + prev->initial->info.stage == MESA_SHADER_TESS_CTRL) { + dxil_nir_pad_tes_input_signature(new_nir_variant, prev->initial); + } + dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, key.prev_varying_outputs, key.prev_varying_frac_outputs); } diff --git a/src/microsoft/compiler/dxil_nir.c b/src/microsoft/compiler/dxil_nir.c index 44312ee2619..12c5a79c933 100644 --- a/src/microsoft/compiler/dxil_nir.c +++ b/src/microsoft/compiler/dxil_nir.c @@ -2699,11 +2699,38 @@ struct undefined_varying_masks { uint64_t io_mask; uint32_t patch_io_mask; const BITSET_WORD *frac_io_mask; + + /* Stage of the shader being processed; used to scope cross-stage + * preservation heuristics to TCS<->TES linkage where they apply. */ + mesa_shader_stage stage; + + /* For kill_unused_outputs: locations of shader_out variables that have at + * least one load_deref use in the same shader (e.g. TCS patch-constant + * function reading back per-vertex outputs). These must be kept alive even + * if the next stage doesn't read them. */ + uint64_t intra_shader_load_mask; + uint32_t intra_shader_patch_load_mask; }; static bool is_dead_in_variable(nir_variable *var, void *data) { + const struct undefined_varying_masks *masks = data; + + /* Only TES needs this: placeholder tess-level inputs (synthesized by + * spirv_to_dxil for signature matching with HS) have no local uses but + * must be preserved. Other stages don't synthesize such placeholders, + * and it's perfectly valid for a prev stage to write outputs the next + * stage doesn't read. */ + if (masks && masks->stage == MESA_SHADER_TESS_EVAL) { + unsigned loc = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ? + var->data.location - VARYING_SLOT_PATCH0 : var->data.location; + uint64_t written = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ? + masks->patch_io_mask : masks->io_mask; + if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & written) + return false; + } + switch (var->data.location) { /* Only these values can be system generated values in addition to varyings */ case VARYING_SLOT_PRIMITIVE_ID: @@ -2774,7 +2801,8 @@ dxil_nir_kill_undefined_varyings(nir_shader *shader, uint64_t prev_stage_written struct undefined_varying_masks masks = { .io_mask = prev_stage_written_mask, .patch_io_mask = prev_stage_patch_written_mask, - .frac_io_mask = prev_stage_frac_output_mask + .frac_io_mask = prev_stage_frac_output_mask, + .stage = shader->info.stage, }; bool progress = nir_shader_instructions_pass(shader, kill_undefined_varyings, @@ -2831,6 +2859,15 @@ kill_unused_outputs(struct nir_builder *b, unsigned loc = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ? var->data.location - VARYING_SLOT_PATCH0 : var->data.location; + + /* Outputs read back by load_deref in the same shader (e.g. TCS patch- + * constant function reading per-vertex outputs after a barrier) must + * stay alive even if the next stage doesn't consume them. */ + uint64_t intra = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ? + masks->intra_shader_patch_load_mask : masks->intra_shader_load_mask; + if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & intra) + return false; + uint64_t read = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ? masks->patch_io_mask : masks->io_mask; if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & read) { @@ -2856,9 +2893,36 @@ dxil_nir_kill_unused_outputs(nir_shader *shader, uint64_t next_stage_read_mask, struct undefined_varying_masks masks = { .io_mask = next_stage_read_mask, .patch_io_mask = next_stage_patch_read_mask, - .frac_io_mask = next_stage_frac_input_mask + .frac_io_mask = next_stage_frac_input_mask, + .stage = shader->info.stage, }; + /* In TCS, the patch-constant function reads per-vertex outputs via + * output[i].x after a barrier; such outputs must be preserved even if + * the next stage doesn't consume them. load_deref on shader_out is only + * valid in TCS, so scope the scan to that stage. */ + if (shader->info.stage == MESA_SHADER_TESS_CTRL) { + nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader); + nir_foreach_block(block, entrypoint) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_deref) + continue; + nir_variable *var = nir_intrinsic_get_var(intr, 0); + if (!var || var->data.mode != nir_var_shader_out) + continue; + if (var->data.patch && var->data.location >= VARYING_SLOT_PATCH0) { + unsigned loc = var->data.location - VARYING_SLOT_PATCH0; + masks.intra_shader_patch_load_mask |= BITFIELD_RANGE(loc, glsl_varying_count(var->type)); + } else { + masks.intra_shader_load_mask |= BITFIELD64_RANGE(var->data.location, glsl_varying_count(var->type)); + } + } + } + } + bool progress = nir_shader_instructions_pass(shader, kill_unused_outputs, nir_metadata_control_flow | @@ -2906,3 +2970,76 @@ dxil_nir_propagate_interp_to_outputs(nir_shader *prev_stage_nir, } } } + +/* Without this, an HS output that survives the kill pass (e.g. via the + * intra-shader scratchpad pattern) but has no matching DS input makes + * D3D12 CreatePipelineState fail with E_INVALIDARG. */ +bool +dxil_nir_pad_tes_input_signature(nir_shader *tes, const nir_shader *tcs) +{ + if (!tcs || !tes || + tes->info.stage != MESA_SHADER_TESS_EVAL || + tcs->info.stage != MESA_SHADER_TESS_CTRL) + return false; + + bool added = false; + + /* Pad only for outputs that TCS reads back itself -- the ones + * kill_unused_outputs keeps alive when TES doesn't read them. Padding + * DS for anything else produces a longer input sig than HS output. */ + const uint64_t tcs_intra_read = tcs->info.outputs_read; + const uint32_t tcs_intra_patch_read = tcs->info.patch_outputs_read; + + nir_foreach_variable_with_modes(out_var, tcs, nir_var_shader_out) { + unsigned loc = out_var->data.location; + switch (loc) { + case VARYING_SLOT_POS: + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_PRIMITIVE_ID: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_VIEWPORT: + continue; + } + + bool intra_accessed; + if (out_var->data.patch && loc >= VARYING_SLOT_PATCH0) + intra_accessed = tcs_intra_patch_read & BITFIELD_BIT(loc - VARYING_SLOT_PATCH0); + else + intra_accessed = tcs_intra_read & BITFIELD64_BIT(loc); + if (!intra_accessed) + continue; + + if (nir_find_variable_with_location(tes, nir_var_shader_in, loc)) + continue; + + nir_variable *v = nir_variable_create(tes, nir_var_shader_in, out_var->type, + out_var->name ? out_var->name : "tes_pad"); + v->data = out_var->data; + v->data.mode = nir_var_shader_in; + v->data.read_only = 1; + added = true; + } + + /* Tess levels come from the fixed-function tessellator, not TCS code. */ + const struct { + gl_varying_slot loc; + const char *name; + uint8_t arr_len; + } tess_levels[] = { + { VARYING_SLOT_TESS_LEVEL_OUTER, "gl_TessLevelOuter", 4 }, + { VARYING_SLOT_TESS_LEVEL_INNER, "gl_TessLevelInner", 2 }, + }; + for (unsigned k = 0; k < ARRAY_SIZE(tess_levels); ++k) { + if (nir_find_variable_with_location(tes, nir_var_shader_in, tess_levels[k].loc)) + continue; + const struct glsl_type *t = glsl_array_type(glsl_float_type(), tess_levels[k].arr_len, 0); + nir_variable *v = nir_variable_create(tes, nir_var_shader_in, t, tess_levels[k].name); + v->data.location = tess_levels[k].loc; + v->data.patch = true; + v->data.compact = true; + v->data.always_active_io = 1; + added = true; + } + + return added; +} diff --git a/src/microsoft/compiler/dxil_nir.h b/src/microsoft/compiler/dxil_nir.h index 6f1e3292143..309c5d2cbd5 100644 --- a/src/microsoft/compiler/dxil_nir.h +++ b/src/microsoft/compiler/dxil_nir.h @@ -92,6 +92,10 @@ bool dxil_nir_kill_unused_outputs(nir_shader *shader, uint64_t next_stage_read_m void dxil_nir_propagate_interp_to_outputs(nir_shader *prev_stage_nir, const nir_shader *fs_nir); +/* Synthesize placeholder TES inputs mirroring TCS outputs the consumer + * doesn't already declare, so DXIL signature shapes match. */ +bool dxil_nir_pad_tes_input_signature(nir_shader *tes, const nir_shader *tcs); + #ifdef __cplusplus } #endif diff --git a/src/microsoft/compiler/dxil_nir_tess.c b/src/microsoft/compiler/dxil_nir_tess.c index 55f04223c22..61ac94d3e57 100644 --- a/src/microsoft/compiler/dxil_nir_tess.c +++ b/src/microsoft/compiler/dxil_nir_tess.c @@ -48,7 +48,8 @@ remove_hs_intrinsics(nir_function_impl *impl) if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic == nir_intrinsic_load_output) { + if (intr->intrinsic == nir_intrinsic_load_output || + intr->intrinsic == nir_intrinsic_load_per_vertex_output) { nir_builder b = nir_builder_at(nir_before_instr(&intr->instr)); nir_def_rewrite_uses(&intr->def, nir_undef(&b, intr->def.num_components, intr->def.bit_size)); } else if (intr->intrinsic != nir_intrinsic_store_output && diff --git a/src/microsoft/spirv_to_dxil/dxil_spirv_nir.c b/src/microsoft/spirv_to_dxil/dxil_spirv_nir.c index 27974c108f7..4763c346a58 100644 --- a/src/microsoft/spirv_to_dxil/dxil_spirv_nir.c +++ b/src/microsoft/spirv_to_dxil/dxil_spirv_nir.c @@ -811,25 +811,19 @@ dxil_spirv_nir_link(nir_shader *nir, nir_shader *prev_stage_nir, NIR_PASS(_, nir, dxil_nir_kill_undefined_varyings, prev_stage_nir->info.outputs_written, prev_stage_nir->info.patch_outputs_written, NULL); NIR_PASS(_, prev_stage_nir, dxil_nir_kill_unused_outputs, nir->info.inputs_read, nir->info.patch_inputs_read, NULL); - dxil_reassign_driver_locations(nir, nir_var_shader_in, prev_stage_nir->info.outputs_written, NULL); - dxil_reassign_driver_locations(prev_stage_nir, nir_var_shader_out, nir->info.inputs_read, NULL); - + /* Tess metadata flows TCS->TES so the placeholder helper below knows + * the per-vertex input array length, and TCS gets the domain config. */ if (nir->info.stage == MESA_SHADER_TESS_EVAL) { assert(prev_stage_nir->info.stage == MESA_SHADER_TESS_CTRL); nir->info.tess.tcs_vertices_out = prev_stage_nir->info.tess.tcs_vertices_out; prev_stage_nir->info.tess = nir->info.tess; - for (uint32_t i = 0; i < 2; ++i) { - unsigned loc = i == 0 ? VARYING_SLOT_TESS_LEVEL_OUTER : VARYING_SLOT_TESS_LEVEL_INNER; - nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_in, loc); - if (!var) { - var = nir_variable_create(nir, nir_var_shader_in, glsl_array_type(glsl_float_type(), i == 0 ? 4 : 2, 0), i == 0 ? "outer" : "inner"); - var->data.location = loc; - var->data.patch = true; - var->data.compact = true; - } - } + /* Pad TES input signature so HS<->DS match for D3D12 pipeline validation. */ + dxil_nir_pad_tes_input_signature(nir, prev_stage_nir); } + + dxil_reassign_driver_locations(nir, nir_var_shader_in, prev_stage_nir->info.outputs_written, NULL); + dxil_reassign_driver_locations(prev_stage_nir, nir_var_shader_out, nir->info.inputs_read, NULL); } glsl_type_singleton_decref();