mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
microsoft/compiler, d3d12: preserve TCS outputs and pad TES inputs for cross-stage signature matching
Four linked D3D12 pipeline-validation problems with GLSL TCS on DXIL: 1) dxil_nir_kill_unused_outputs killed TCS outputs read back by the patch-constant function after a barrier, zeroing the tess factors. Keep shader_out locations with any intra-shader load_deref live regardless of next_stage_read_mask. 2) is_dead_in_variable dropped TES padding placeholders (no local uses) in nir_remove_dead_variables. Also honor prev_stage_written_mask so padded TES inputs stay alive. 3) Preserving (1) leaves HS with outputs the DS doesn't declare, breaking pipeline validation (e.g. piglit's barrier.shader_test). Add dxil_nir_pad_tes_input_signature, called from both link paths, to synthesize matching TES inputs (reusing each TCS output's type so sig shape and stride match byte-for-byte) plus the tess-level inputs -- subsuming the tess-level-only block previously in dxil_spirv_nir_link. Scope the per-variable padding to TCS outputs that TCS itself reads back via load_deref: outputs that neither TES nor TCS consumes get killed from the HS signature, so padding them into DS would make the DS input signature longer than HS output and break validation for SSO pipelines whose TCS declares unused per-patch writes (arb_separate_shader_objects/ mix-and-match-tcs-tes). 4) remove_hs_intrinsics rewrote load_output but not load_per_vertex_output in HS main. With (1) keeping outputs alive, GLSL reads of outputs in main whose result survives DCE (UAV atomics, non-tess per-vertex output writes) left LoadOutputControlPoint in the control-point function, which dxil.dll rejects outside the PCF (CreatePipelineState then fails with E_INVALIDARG). Treat load_per_vertex_output like load_output. Validated on piglit arb_tessellation_shader/execution (WARP + DXC 1.8.2403): barrier now passes; the previously-crashing tcs-output-unmatched and variable-indexing/tcs-output-array-* fail gracefully matching baseline; isoline/isoline-no-tcs remain flakes (pre-existing canary corruption, unrelated). d3d12-quick_shader.txt drops barrier; d3d12-flakes.txt adds isoline-no-tcs alongside isoline. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41028>
This commit is contained in:
parent
1d923fdd2b
commit
50ab52f135
7 changed files with 160 additions and 17 deletions
|
|
@ -1,5 +1,6 @@
|
|||
# Tessellation isoline tests are flaky on WARP, probably a WARP bug
|
||||
isoline
|
||||
isoline-no-tcs
|
||||
|
||||
# If the test machine are busy, these can spuriously fail
|
||||
arb_timer_query
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ spec@glsl-1.50@execution@variable-indexing@gs-output-array-vec4-index-wr,Crash
|
|||
|
||||
# These tests use a TCS output variable only as temporary storage. Since the output
|
||||
# is unused by the TES, we remove it.
|
||||
spec@arb_tessellation_shader@execution@barrier,Fail
|
||||
spec@arb_tessellation_shader@execution@barrier-patch,Crash
|
||||
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-float-index-wr-before-barrier,Fail
|
||||
spec@arb_tessellation_shader@execution@variable-indexing@tcs-output-array-vec2-index-rd-after-barrier,Fail
|
||||
|
|
|
|||
|
|
@ -1173,6 +1173,13 @@ select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_sele
|
|||
if (prev) {
|
||||
NIR_PASS(_, new_nir_variant, dxil_nir_kill_undefined_varyings, key.prev_varying_outputs,
|
||||
prev->initial->info.patch_outputs_written, key.prev_varying_frac_outputs);
|
||||
|
||||
/* Pad TES input signature so HS<->DS match for D3D12 pipeline validation. */
|
||||
if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||
prev->initial->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
dxil_nir_pad_tes_input_signature(new_nir_variant, prev->initial);
|
||||
}
|
||||
|
||||
dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, key.prev_varying_outputs,
|
||||
key.prev_varying_frac_outputs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2699,11 +2699,38 @@ struct undefined_varying_masks {
|
|||
uint64_t io_mask;
|
||||
uint32_t patch_io_mask;
|
||||
const BITSET_WORD *frac_io_mask;
|
||||
|
||||
/* Stage of the shader being processed; used to scope cross-stage
|
||||
* preservation heuristics to TCS<->TES linkage where they apply. */
|
||||
mesa_shader_stage stage;
|
||||
|
||||
/* For kill_unused_outputs: locations of shader_out variables that have at
|
||||
* least one load_deref use in the same shader (e.g. TCS patch-constant
|
||||
* function reading back per-vertex outputs). These must be kept alive even
|
||||
* if the next stage doesn't read them. */
|
||||
uint64_t intra_shader_load_mask;
|
||||
uint32_t intra_shader_patch_load_mask;
|
||||
};
|
||||
|
||||
static bool
|
||||
is_dead_in_variable(nir_variable *var, void *data)
|
||||
{
|
||||
const struct undefined_varying_masks *masks = data;
|
||||
|
||||
/* Only TES needs this: placeholder tess-level inputs (synthesized by
|
||||
* spirv_to_dxil for signature matching with HS) have no local uses but
|
||||
* must be preserved. Other stages don't synthesize such placeholders,
|
||||
* and it's perfectly valid for a prev stage to write outputs the next
|
||||
* stage doesn't read. */
|
||||
if (masks && masks->stage == MESA_SHADER_TESS_EVAL) {
|
||||
unsigned loc = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ?
|
||||
var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
|
||||
uint64_t written = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ?
|
||||
masks->patch_io_mask : masks->io_mask;
|
||||
if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & written)
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (var->data.location) {
|
||||
/* Only these values can be system generated values in addition to varyings */
|
||||
case VARYING_SLOT_PRIMITIVE_ID:
|
||||
|
|
@ -2774,7 +2801,8 @@ dxil_nir_kill_undefined_varyings(nir_shader *shader, uint64_t prev_stage_written
|
|||
struct undefined_varying_masks masks = {
|
||||
.io_mask = prev_stage_written_mask,
|
||||
.patch_io_mask = prev_stage_patch_written_mask,
|
||||
.frac_io_mask = prev_stage_frac_output_mask
|
||||
.frac_io_mask = prev_stage_frac_output_mask,
|
||||
.stage = shader->info.stage,
|
||||
};
|
||||
bool progress = nir_shader_instructions_pass(shader,
|
||||
kill_undefined_varyings,
|
||||
|
|
@ -2831,6 +2859,15 @@ kill_unused_outputs(struct nir_builder *b,
|
|||
unsigned loc = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ?
|
||||
var->data.location - VARYING_SLOT_PATCH0 :
|
||||
var->data.location;
|
||||
|
||||
/* Outputs read back by load_deref in the same shader (e.g. TCS patch-
|
||||
* constant function reading per-vertex outputs after a barrier) must
|
||||
* stay alive even if the next stage doesn't consume them. */
|
||||
uint64_t intra = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ?
|
||||
masks->intra_shader_patch_load_mask : masks->intra_shader_load_mask;
|
||||
if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & intra)
|
||||
return false;
|
||||
|
||||
uint64_t read = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ?
|
||||
masks->patch_io_mask : masks->io_mask;
|
||||
if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & read) {
|
||||
|
|
@ -2856,9 +2893,36 @@ dxil_nir_kill_unused_outputs(nir_shader *shader, uint64_t next_stage_read_mask,
|
|||
struct undefined_varying_masks masks = {
|
||||
.io_mask = next_stage_read_mask,
|
||||
.patch_io_mask = next_stage_patch_read_mask,
|
||||
.frac_io_mask = next_stage_frac_input_mask
|
||||
.frac_io_mask = next_stage_frac_input_mask,
|
||||
.stage = shader->info.stage,
|
||||
};
|
||||
|
||||
/* In TCS, the patch-constant function reads per-vertex outputs via
|
||||
* output[i].x after a barrier; such outputs must be preserved even if
|
||||
* the next stage doesn't consume them. load_deref on shader_out is only
|
||||
* valid in TCS, so scope the scan to that stage. */
|
||||
if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
nir_function_impl *entrypoint = nir_shader_get_entrypoint(shader);
|
||||
nir_foreach_block(block, entrypoint) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_load_deref)
|
||||
continue;
|
||||
nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
||||
if (!var || var->data.mode != nir_var_shader_out)
|
||||
continue;
|
||||
if (var->data.patch && var->data.location >= VARYING_SLOT_PATCH0) {
|
||||
unsigned loc = var->data.location - VARYING_SLOT_PATCH0;
|
||||
masks.intra_shader_patch_load_mask |= BITFIELD_RANGE(loc, glsl_varying_count(var->type));
|
||||
} else {
|
||||
masks.intra_shader_load_mask |= BITFIELD64_RANGE(var->data.location, glsl_varying_count(var->type));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool progress = nir_shader_instructions_pass(shader,
|
||||
kill_unused_outputs,
|
||||
nir_metadata_control_flow |
|
||||
|
|
@ -2906,3 +2970,76 @@ dxil_nir_propagate_interp_to_outputs(nir_shader *prev_stage_nir,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Without this, an HS output that survives the kill pass (e.g. via the
|
||||
* intra-shader scratchpad pattern) but has no matching DS input makes
|
||||
* D3D12 CreatePipelineState fail with E_INVALIDARG. */
|
||||
bool
|
||||
dxil_nir_pad_tes_input_signature(nir_shader *tes, const nir_shader *tcs)
|
||||
{
|
||||
if (!tcs || !tes ||
|
||||
tes->info.stage != MESA_SHADER_TESS_EVAL ||
|
||||
tcs->info.stage != MESA_SHADER_TESS_CTRL)
|
||||
return false;
|
||||
|
||||
bool added = false;
|
||||
|
||||
/* Pad only for outputs that TCS reads back itself -- the ones
|
||||
* kill_unused_outputs keeps alive when TES doesn't read them. Padding
|
||||
* DS for anything else produces a longer input sig than HS output. */
|
||||
const uint64_t tcs_intra_read = tcs->info.outputs_read;
|
||||
const uint32_t tcs_intra_patch_read = tcs->info.patch_outputs_read;
|
||||
|
||||
nir_foreach_variable_with_modes(out_var, tcs, nir_var_shader_out) {
|
||||
unsigned loc = out_var->data.location;
|
||||
switch (loc) {
|
||||
case VARYING_SLOT_POS:
|
||||
case VARYING_SLOT_PSIZ:
|
||||
case VARYING_SLOT_PRIMITIVE_ID:
|
||||
case VARYING_SLOT_LAYER:
|
||||
case VARYING_SLOT_VIEWPORT:
|
||||
continue;
|
||||
}
|
||||
|
||||
bool intra_accessed;
|
||||
if (out_var->data.patch && loc >= VARYING_SLOT_PATCH0)
|
||||
intra_accessed = tcs_intra_patch_read & BITFIELD_BIT(loc - VARYING_SLOT_PATCH0);
|
||||
else
|
||||
intra_accessed = tcs_intra_read & BITFIELD64_BIT(loc);
|
||||
if (!intra_accessed)
|
||||
continue;
|
||||
|
||||
if (nir_find_variable_with_location(tes, nir_var_shader_in, loc))
|
||||
continue;
|
||||
|
||||
nir_variable *v = nir_variable_create(tes, nir_var_shader_in, out_var->type,
|
||||
out_var->name ? out_var->name : "tes_pad");
|
||||
v->data = out_var->data;
|
||||
v->data.mode = nir_var_shader_in;
|
||||
v->data.read_only = 1;
|
||||
added = true;
|
||||
}
|
||||
|
||||
/* Tess levels come from the fixed-function tessellator, not TCS code. */
|
||||
const struct {
|
||||
gl_varying_slot loc;
|
||||
const char *name;
|
||||
uint8_t arr_len;
|
||||
} tess_levels[] = {
|
||||
{ VARYING_SLOT_TESS_LEVEL_OUTER, "gl_TessLevelOuter", 4 },
|
||||
{ VARYING_SLOT_TESS_LEVEL_INNER, "gl_TessLevelInner", 2 },
|
||||
};
|
||||
for (unsigned k = 0; k < ARRAY_SIZE(tess_levels); ++k) {
|
||||
if (nir_find_variable_with_location(tes, nir_var_shader_in, tess_levels[k].loc))
|
||||
continue;
|
||||
const struct glsl_type *t = glsl_array_type(glsl_float_type(), tess_levels[k].arr_len, 0);
|
||||
nir_variable *v = nir_variable_create(tes, nir_var_shader_in, t, tess_levels[k].name);
|
||||
v->data.location = tess_levels[k].loc;
|
||||
v->data.patch = true;
|
||||
v->data.compact = true;
|
||||
v->data.always_active_io = 1;
|
||||
added = true;
|
||||
}
|
||||
|
||||
return added;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -92,6 +92,10 @@ bool dxil_nir_kill_unused_outputs(nir_shader *shader, uint64_t next_stage_read_m
|
|||
void dxil_nir_propagate_interp_to_outputs(nir_shader *prev_stage_nir,
|
||||
const nir_shader *fs_nir);
|
||||
|
||||
/* Synthesize placeholder TES inputs mirroring TCS outputs the consumer
|
||||
* doesn't already declare, so DXIL signature shapes match. */
|
||||
bool dxil_nir_pad_tes_input_signature(nir_shader *tes, const nir_shader *tcs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -48,7 +48,8 @@ remove_hs_intrinsics(nir_function_impl *impl)
|
|||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic == nir_intrinsic_load_output) {
|
||||
if (intr->intrinsic == nir_intrinsic_load_output ||
|
||||
intr->intrinsic == nir_intrinsic_load_per_vertex_output) {
|
||||
nir_builder b = nir_builder_at(nir_before_instr(&intr->instr));
|
||||
nir_def_rewrite_uses(&intr->def, nir_undef(&b, intr->def.num_components, intr->def.bit_size));
|
||||
} else if (intr->intrinsic != nir_intrinsic_store_output &&
|
||||
|
|
|
|||
|
|
@ -811,25 +811,19 @@ dxil_spirv_nir_link(nir_shader *nir, nir_shader *prev_stage_nir,
|
|||
NIR_PASS(_, nir, dxil_nir_kill_undefined_varyings, prev_stage_nir->info.outputs_written, prev_stage_nir->info.patch_outputs_written, NULL);
|
||||
NIR_PASS(_, prev_stage_nir, dxil_nir_kill_unused_outputs, nir->info.inputs_read, nir->info.patch_inputs_read, NULL);
|
||||
|
||||
dxil_reassign_driver_locations(nir, nir_var_shader_in, prev_stage_nir->info.outputs_written, NULL);
|
||||
dxil_reassign_driver_locations(prev_stage_nir, nir_var_shader_out, nir->info.inputs_read, NULL);
|
||||
|
||||
/* Tess metadata flows TCS->TES so the placeholder helper below knows
|
||||
* the per-vertex input array length, and TCS gets the domain config. */
|
||||
if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
assert(prev_stage_nir->info.stage == MESA_SHADER_TESS_CTRL);
|
||||
nir->info.tess.tcs_vertices_out = prev_stage_nir->info.tess.tcs_vertices_out;
|
||||
prev_stage_nir->info.tess = nir->info.tess;
|
||||
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
unsigned loc = i == 0 ? VARYING_SLOT_TESS_LEVEL_OUTER : VARYING_SLOT_TESS_LEVEL_INNER;
|
||||
nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_in, loc);
|
||||
if (!var) {
|
||||
var = nir_variable_create(nir, nir_var_shader_in, glsl_array_type(glsl_float_type(), i == 0 ? 4 : 2, 0), i == 0 ? "outer" : "inner");
|
||||
var->data.location = loc;
|
||||
var->data.patch = true;
|
||||
var->data.compact = true;
|
||||
}
|
||||
}
|
||||
/* Pad TES input signature so HS<->DS match for D3D12 pipeline validation. */
|
||||
dxil_nir_pad_tes_input_signature(nir, prev_stage_nir);
|
||||
}
|
||||
|
||||
dxil_reassign_driver_locations(nir, nir_var_shader_in, prev_stage_nir->info.outputs_written, NULL);
|
||||
dxil_reassign_driver_locations(prev_stage_nir, nir_var_shader_out, nir->info.inputs_read, NULL);
|
||||
}
|
||||
|
||||
glsl_type_singleton_decref();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue