microsoft/compiler: Add a fractional var mask for variable sorting

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28535>
This commit is contained in:
Jesse Natalie 2024-04-02 14:48:07 -07:00 committed by Marge Bot
parent 760effefc6
commit c21355260b
5 changed files with 50 additions and 28 deletions

View file

@ -1154,15 +1154,15 @@ select_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_sele
/* Remove not-written inputs, and re-sort */
if (prev) {
uint32_t prev_stage_patch_written = prev->initial->info.patch_outputs_written;
NIR_PASS_V(new_nir_variant, dxil_nir_kill_undefined_varyings, key.prev_varying_outputs, prev_stage_patch_written);
dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, key.prev_varying_outputs);
NIR_PASS_V(new_nir_variant, dxil_nir_kill_undefined_varyings, key.prev_varying_outputs, prev_stage_patch_written, NULL);
dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, key.prev_varying_outputs, NULL);
}
/* Remove not-read outputs and re-sort */
if (next) {
uint32_t next_stage_patch_read = next->initial->info.patch_inputs_read;
NIR_PASS_V(new_nir_variant, dxil_nir_kill_unused_outputs, key.next_varying_inputs, next_stage_patch_read);
dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out, key.next_varying_inputs);
NIR_PASS_V(new_nir_variant, dxil_nir_kill_unused_outputs, key.next_varying_inputs, next_stage_patch_read, NULL);
dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out, key.next_varying_inputs, NULL);
}
nir_shader_gather_info(new_nir_variant, nir_shader_get_entrypoint(new_nir_variant));
@ -1405,7 +1405,7 @@ d3d12_create_shader(struct d3d12_context *ctx,
}
if (nir->info.stage != MESA_SHADER_VERTEX) {
dxil_reassign_driver_locations(nir, nir_var_shader_in, 0);
dxil_reassign_driver_locations(nir, nir_var_shader_in, 0, NULL);
} else {
dxil_sort_by_driver_location(nir, nir_var_shader_in);
@ -1417,7 +1417,7 @@ d3d12_create_shader(struct d3d12_context *ctx,
}
if (nir->info.stage != MESA_SHADER_FRAGMENT) {
dxil_reassign_driver_locations(nir, nir_var_shader_out, 0);
dxil_reassign_driver_locations(nir, nir_var_shader_out, 0, NULL);
} else {
NIR_PASS_V(nir, nir_lower_fragcoord_wtrans);
NIR_PASS_V(nir, dxil_nir_lower_sample_pos);

View file

@ -285,10 +285,6 @@ d3d12_lower_point_sprite(nir_shader *shader,
}
state.num_point_coords = count;
if (count) {
dxil_reassign_driver_locations(shader, nir_var_shader_out,
next_inputs_read);
}
nir_foreach_function_impl(impl, shader) {
nir_builder builder = nir_builder_create(impl);

View file

@ -1583,7 +1583,8 @@ enum dxil_sysvalue_type {
};
static enum dxil_sysvalue_type
nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask,
const BITSET_WORD *other_stage_frac_mask)
{
switch (var->data.location) {
case VARYING_SLOT_FACE:
@ -1605,6 +1606,10 @@ nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
if (var->data.location < VARYING_SLOT_PATCH0 &&
!((1ull << var->data.location) & other_stage_mask))
return DXIL_UNUSED_NO_SYSVALUE;
if (var->data.location_frac && other_stage_frac_mask &&
var->data.location >= VARYING_SLOT_VAR0 &&
!BITSET_TEST(other_stage_frac_mask, ((var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac)))
return DXIL_UNUSED_NO_SYSVALUE;
return DXIL_NO_SYSVALUE;
}
}
@ -1614,13 +1619,13 @@ nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)
*/
void
dxil_reassign_driver_locations(nir_shader* s, nir_variable_mode modes,
uint64_t other_stage_mask)
uint64_t other_stage_mask, const BITSET_WORD *other_stage_frac_mask)
{
nir_foreach_variable_with_modes_safe(var, s, modes) {
/* We use the driver_location here to avoid introducing a new
* struct or member variable here. The true, updated driver location
* will be written below, after sorting */
var->data.driver_location = nir_var_to_dxil_sysvalue_type(var, other_stage_mask);
var->data.driver_location = nir_var_to_dxil_sysvalue_type(var, other_stage_mask, other_stage_frac_mask);
}
nir_sort_variables_with_modes(s, variable_location_cmp, modes);
@ -2805,6 +2810,7 @@ dxil_nir_lower_coherent_loads_and_stores(nir_shader *s)
struct undefined_varying_masks {
uint64_t io_mask;
uint32_t patch_io_mask;
const BITSET_WORD *frac_io_mask;
};
static bool
@ -2852,8 +2858,12 @@ kill_undefined_varyings(struct nir_builder *b,
var->data.location;
uint64_t written = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ?
masks->patch_io_mask : masks->io_mask;
if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & written)
if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & written) {
if (!masks->frac_io_mask || !var->data.location_frac ||
var->data.location < VARYING_SLOT_VAR0 ||
BITSET_TEST(masks->frac_io_mask, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac))
return false;
}
b->cursor = nir_after_instr(instr);
/* Note: zero is used instead of undef, because optimization is not run here, but is
@ -2871,9 +2881,14 @@ kill_undefined_varyings(struct nir_builder *b,
}
bool
dxil_nir_kill_undefined_varyings(nir_shader *shader, uint64_t prev_stage_written_mask, uint32_t prev_stage_patch_written_mask)
dxil_nir_kill_undefined_varyings(nir_shader *shader, uint64_t prev_stage_written_mask, uint32_t prev_stage_patch_written_mask,
const BITSET_WORD *prev_stage_frac_output_mask)
{
struct undefined_varying_masks masks = { .io_mask = prev_stage_written_mask, .patch_io_mask = prev_stage_patch_written_mask };
struct undefined_varying_masks masks = {
.io_mask = prev_stage_written_mask,
.patch_io_mask = prev_stage_patch_written_mask,
.frac_io_mask = prev_stage_frac_output_mask
};
bool progress = nir_shader_instructions_pass(shader,
kill_undefined_varyings,
nir_metadata_dominance |
@ -2932,8 +2947,12 @@ kill_unused_outputs(struct nir_builder *b,
var->data.location;
uint64_t read = var->data.patch && var->data.location >= VARYING_SLOT_PATCH0 ?
masks->patch_io_mask : masks->io_mask;
if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & read)
if (BITFIELD64_RANGE(loc, glsl_varying_count(var->type)) & read) {
if (!masks->frac_io_mask || !var->data.location_frac ||
var->data.location < VARYING_SLOT_VAR0 ||
BITSET_TEST(masks->frac_io_mask, (var->data.location - VARYING_SLOT_VAR0) * 4 + var->data.location_frac))
return false;
}
if (intr->intrinsic == nir_intrinsic_load_deref) {
b->cursor = nir_after_instr(&intr->instr);
@ -2945,9 +2964,14 @@ kill_unused_outputs(struct nir_builder *b,
}
bool
dxil_nir_kill_unused_outputs(nir_shader *shader, uint64_t next_stage_read_mask, uint32_t next_stage_patch_read_mask)
dxil_nir_kill_unused_outputs(nir_shader *shader, uint64_t next_stage_read_mask, uint32_t next_stage_patch_read_mask,
const BITSET_WORD *next_stage_frac_input_mask)
{
struct undefined_varying_masks masks = { .io_mask = next_stage_read_mask, .patch_io_mask = next_stage_patch_read_mask };
struct undefined_varying_masks masks = {
.io_mask = next_stage_read_mask,
.patch_io_mask = next_stage_patch_read_mask,
.frac_io_mask = next_stage_frac_input_mask
};
bool progress = nir_shader_instructions_pass(shader,
kill_unused_outputs,

View file

@ -67,7 +67,7 @@ dxil_sort_ps_outputs(nir_shader* s);
void
dxil_reassign_driver_locations(nir_shader* s, nir_variable_mode modes,
uint64_t other_stage_mask);
uint64_t other_stage_mask, const BITSET_WORD *other_stage_frac_mask);
void dxil_nir_split_tess_ctrl(nir_shader *nir, nir_function **patch_const_func);
bool dxil_nir_fixup_tess_level_for_domain(nir_shader *nir);
@ -89,8 +89,10 @@ bool dxil_nir_analyze_io_dependencies(struct dxil_module *mod, nir_shader *s);
bool dxil_nir_guess_image_formats(nir_shader *s);
bool dxil_nir_lower_coherent_loads_and_stores(nir_shader *s);
bool dxil_nir_kill_undefined_varyings(nir_shader *shader, uint64_t prev_stage_written_mask, uint32_t prev_stage_patch_written_mask);
bool dxil_nir_kill_unused_outputs(nir_shader *shader, uint64_t next_stage_read_mask, uint32_t next_stage_patch_read_mask);
bool dxil_nir_kill_undefined_varyings(nir_shader *shader, uint64_t prev_stage_written_mask,
uint32_t prev_stage_patch_written_mask, const BITSET_WORD *prev_stage_frac_output_mask);
bool dxil_nir_kill_unused_outputs(nir_shader *shader, uint64_t next_stage_read_mask,
uint32_t next_stage_patch_read_mask, const BITSET_WORD *next_stage_frac_input_mask);
#ifdef __cplusplus
}

View file

@ -769,11 +769,11 @@ dxil_spirv_nir_link(nir_shader *nir, nir_shader *prev_stage_nir,
}
}
NIR_PASS_V(nir, dxil_nir_kill_undefined_varyings, prev_stage_nir->info.outputs_written, prev_stage_nir->info.patch_outputs_written);
NIR_PASS_V(prev_stage_nir, dxil_nir_kill_unused_outputs, nir->info.inputs_read, nir->info.patch_inputs_read);
NIR_PASS_V(nir, dxil_nir_kill_undefined_varyings, prev_stage_nir->info.outputs_written, prev_stage_nir->info.patch_outputs_written, NULL);
NIR_PASS_V(prev_stage_nir, dxil_nir_kill_unused_outputs, nir->info.inputs_read, nir->info.patch_inputs_read, NULL);
dxil_reassign_driver_locations(nir, nir_var_shader_in, prev_stage_nir->info.outputs_written);
dxil_reassign_driver_locations(prev_stage_nir, nir_var_shader_out, nir->info.inputs_read);
dxil_reassign_driver_locations(nir, nir_var_shader_in, prev_stage_nir->info.outputs_written, NULL);
dxil_reassign_driver_locations(prev_stage_nir, nir_var_shader_out, nir->info.inputs_read, NULL);
if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
assert(prev_stage_nir->info.stage == MESA_SHADER_TESS_CTRL);
@ -1099,7 +1099,7 @@ dxil_spirv_nir_passes(nir_shader *nir,
* assigned even if there's just a single vertex shader in the
* pipeline. The real linking happens in dxil_spirv_nir_link().
*/
dxil_reassign_driver_locations(nir, nir_var_shader_out, 0);
dxil_reassign_driver_locations(nir, nir_var_shader_out, 0, NULL);
}
if (nir->info.stage == MESA_SHADER_VERTEX) {
@ -1111,7 +1111,7 @@ dxil_spirv_nir_passes(nir_shader *nir,
dxil_sort_by_driver_location(nir, nir_var_shader_in);
} else {
dxil_reassign_driver_locations(nir, nir_var_shader_in, 0);
dxil_reassign_driver_locations(nir, nir_var_shader_in, 0, NULL);
}
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));