mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 09:30:13 +01:00
nir/validate: expand IO intrinsic validation with nir_io_semantics
There are many workarounds. v2: add more validation Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> (v1) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38113>
This commit is contained in:
parent
390023f9fd
commit
2f6b4803ab
7 changed files with 162 additions and 37 deletions
|
|
@ -86,7 +86,8 @@ v3d_nir_store_output(nir_builder *b, int base, nir_def *offset,
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0,
|
nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0,
|
||||||
.src_type = nir_type_uint | chan->bit_size);
|
.src_type = nir_type_uint | chan->bit_size,
|
||||||
|
.io_semantics.no_validate = 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
||||||
|
|
@ -1995,7 +1995,8 @@ typedef struct nir_io_semantics {
|
||||||
unsigned no_sysval_output : 1; /* whether this system value output has no
|
unsigned no_sysval_output : 1; /* whether this system value output has no
|
||||||
effect due to current pipeline states */
|
effect due to current pipeline states */
|
||||||
unsigned interp_explicit_strict : 1; /* preserve original vertex order */
|
unsigned interp_explicit_strict : 1; /* preserve original vertex order */
|
||||||
unsigned _pad : 1;
|
/* Skip nir_validate of the intrinsic. Any new code that sets it will ba NAK'd. */
|
||||||
|
unsigned no_validate : 1;
|
||||||
} nir_io_semantics;
|
} nir_io_semantics;
|
||||||
|
|
||||||
/* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains
|
/* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,10 @@ store_clipdist_output(nir_builder *b, nir_variable *out, int location, int locat
|
||||||
nir_io_semantics semantics = {
|
nir_io_semantics semantics = {
|
||||||
.location = location,
|
.location = location,
|
||||||
.num_slots = b->shader->options->compact_arrays ? num_slots : 1,
|
.num_slots = b->shader->options->compact_arrays ? num_slots : 1,
|
||||||
|
/* the offset src has a different definition for compact
|
||||||
|
* arrays, and is unfoldable (nir_validate requires that constant
|
||||||
|
* offsets are always 0) */
|
||||||
|
.no_validate = b->shader->options->compact_arrays,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (location == VARYING_SLOT_CLIP_DIST1 || location_offset)
|
if (location == VARYING_SLOT_CLIP_DIST1 || location_offset)
|
||||||
|
|
@ -120,20 +124,32 @@ static void
|
||||||
load_clipdist_input(nir_builder *b, nir_variable *in, int location_offset,
|
load_clipdist_input(nir_builder *b, nir_variable *in, int location_offset,
|
||||||
nir_def **val, bool use_load_interp)
|
nir_def **val, bool use_load_interp)
|
||||||
{
|
{
|
||||||
|
bool compact_arrays = b->shader->options->compact_arrays;
|
||||||
|
nir_def *offset = nir_imm_int(b, compact_arrays ? location_offset : 0);
|
||||||
|
unsigned const_offset = compact_arrays ? 0 : location_offset;
|
||||||
|
|
||||||
nir_def *load;
|
nir_def *load;
|
||||||
if (use_load_interp) {
|
if (use_load_interp) {
|
||||||
/* TODO: use sample when per-sample shading? */
|
/* TODO: use sample when per-sample shading? */
|
||||||
nir_def *barycentric = nir_load_barycentric(
|
nir_def *barycentric = nir_load_barycentric(
|
||||||
b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_NONE);
|
b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_NONE);
|
||||||
load = nir_load_interpolated_input(
|
load = nir_load_interpolated_input(
|
||||||
b, 4, 32, barycentric, nir_imm_int(b, location_offset),
|
b, 4, 32, barycentric, offset,
|
||||||
.base = in->data.driver_location,
|
.base = in->data.driver_location + const_offset,
|
||||||
.io_semantics.location = in->data.location);
|
.io_semantics.location = in->data.location + const_offset,
|
||||||
|
/* the offset src has a different definition for compact
|
||||||
|
* arrays, and is unfoldable (nir_validate requires that constant
|
||||||
|
* offsets are always 0) */
|
||||||
|
.io_semantics.no_validate = compact_arrays);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
load = nir_load_input(b, 4, 32, nir_imm_int(b, location_offset),
|
load = nir_load_input(b, 4, 32, offset,
|
||||||
.base = in->data.driver_location,
|
.base = in->data.driver_location + const_offset,
|
||||||
.io_semantics.location = in->data.location);
|
.io_semantics.location = in->data.location + const_offset,
|
||||||
|
/* the offset src has a different definition for compact
|
||||||
|
* arrays, and is unfoldable (nir_validate requires that constant
|
||||||
|
* offsets are always 0) */
|
||||||
|
.io_semantics.no_validate = compact_arrays);
|
||||||
}
|
}
|
||||||
|
|
||||||
val[0] = nir_channel(b, load, 0);
|
val[0] = nir_channel(b, load, 0);
|
||||||
|
|
|
||||||
|
|
@ -1530,6 +1530,9 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
||||||
fprintf(fp, ")");
|
fprintf(fp, ")");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (io.no_validate)
|
||||||
|
fprintf(fp, " no_validate");
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -778,9 +778,52 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nir_intrinsic_has_io_semantics(instr) &&
|
if (nir_intrinsic_has_io_semantics(instr) &&
|
||||||
!nir_intrinsic_infos[instr->intrinsic].has_dest) {
|
!nir_intrinsic_io_semantics(instr).no_validate) {
|
||||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
bool is_input = instr->intrinsic == nir_intrinsic_load_input ||
|
||||||
|
instr->intrinsic == nir_intrinsic_load_per_vertex_input ||
|
||||||
|
instr->intrinsic == nir_intrinsic_load_per_primitive_input ||
|
||||||
|
instr->intrinsic == nir_intrinsic_load_interpolated_input ||
|
||||||
|
instr->intrinsic == nir_intrinsic_load_input_vertex;
|
||||||
|
bool is_output = instr->intrinsic == nir_intrinsic_load_output ||
|
||||||
|
instr->intrinsic == nir_intrinsic_load_per_vertex_output ||
|
||||||
|
instr->intrinsic == nir_intrinsic_load_per_primitive_output ||
|
||||||
|
instr->intrinsic == nir_intrinsic_load_per_view_output ||
|
||||||
|
instr->intrinsic == nir_intrinsic_store_output ||
|
||||||
|
instr->intrinsic == nir_intrinsic_store_per_vertex_output ||
|
||||||
|
instr->intrinsic == nir_intrinsic_store_per_primitive_output ||
|
||||||
|
instr->intrinsic == nir_intrinsic_store_per_view_output;
|
||||||
|
/* Driver-specific intrinsics with IO semantics are not validated. */
|
||||||
|
bool is_core_intrinsic = is_input || is_output;
|
||||||
|
|
||||||
|
if (is_core_intrinsic) {
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||||
|
unsigned max_num_components;
|
||||||
|
|
||||||
|
/* Validate that a single intrinsic doesn't access past the maximum
|
||||||
|
* number of components per slot.
|
||||||
|
*/
|
||||||
|
/* TODO: remove nir_io_radv_intrinsic_component_workaround */
|
||||||
|
if (state->shader->options->io_options &
|
||||||
|
nir_io_radv_intrinsic_component_workaround && sem.high_16bits)
|
||||||
|
max_num_components = 8;
|
||||||
|
else
|
||||||
|
max_num_components = 4;
|
||||||
|
|
||||||
|
if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
|
||||||
|
/* Input and output loads shouldn't load from multiple slots at once. */
|
||||||
|
validate_assert(state, nir_intrinsic_component(instr) +
|
||||||
|
instr->def.num_components <= max_num_components);
|
||||||
|
} else {
|
||||||
|
/* Output stores shouldn't write to multiple slots at once. */
|
||||||
|
validate_assert(state, nir_intrinsic_component(instr) +
|
||||||
|
instr->num_components <= max_num_components);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_input) {
|
||||||
|
validate_assert(state, !sem.no_sysval_output);
|
||||||
|
validate_assert(state, !sem.no_varying);
|
||||||
|
validate_assert(state, !sem.gs_streams);
|
||||||
|
} else {
|
||||||
/* An output that has no effect shouldn't be present in the IR. */
|
/* An output that has no effect shouldn't be present in the IR. */
|
||||||
validate_assert(state,
|
validate_assert(state,
|
||||||
(nir_slot_is_sysval_output(sem.location, MESA_SHADER_NONE) &&
|
(nir_slot_is_sysval_output(sem.location, MESA_SHADER_NONE) &&
|
||||||
|
|
@ -792,22 +835,66 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
|
||||||
* that the output is only read by TCS and not TES.
|
* that the output is only read by TCS and not TES.
|
||||||
*/
|
*/
|
||||||
state->shader->info.stage == MESA_SHADER_TESS_CTRL);
|
state->shader->info.stage == MESA_SHADER_TESS_CTRL);
|
||||||
|
|
||||||
|
validate_assert(state,
|
||||||
|
!sem.gs_streams ||
|
||||||
|
state->shader->info.stage == MESA_SHADER_GEOMETRY);
|
||||||
|
}
|
||||||
|
|
||||||
validate_assert(state,
|
validate_assert(state,
|
||||||
(!sem.dual_source_blend_index &&
|
(!sem.dual_source_blend_index &&
|
||||||
!sem.fb_fetch_output &&
|
!sem.fb_fetch_output &&
|
||||||
!sem.fb_fetch_output_coherent) ||
|
!sem.fb_fetch_output_coherent) ||
|
||||||
state->shader->info.stage == MESA_SHADER_FRAGMENT);
|
(state->shader->info.stage == MESA_SHADER_FRAGMENT &&
|
||||||
validate_assert(state,
|
is_output));
|
||||||
!sem.gs_streams ||
|
|
||||||
state->shader->info.stage == MESA_SHADER_GEOMETRY);
|
|
||||||
validate_assert(state,
|
validate_assert(state,
|
||||||
!sem.high_dvec2 ||
|
!sem.high_dvec2 ||
|
||||||
(state->shader->info.stage == MESA_SHADER_VERTEX &&
|
(state->shader->info.stage == MESA_SHADER_VERTEX &&
|
||||||
instr->intrinsic == nir_intrinsic_load_input));
|
is_input));
|
||||||
|
|
||||||
validate_assert(state,
|
validate_assert(state,
|
||||||
!sem.interp_explicit_strict ||
|
!sem.interp_explicit_strict ||
|
||||||
(state->shader->info.stage == MESA_SHADER_FRAGMENT &&
|
(state->shader->info.stage == MESA_SHADER_FRAGMENT &&
|
||||||
instr->intrinsic == nir_intrinsic_load_input_vertex));
|
instr->intrinsic == nir_intrinsic_load_input_vertex));
|
||||||
|
|
||||||
|
/* Non-zero src offset with num_slots == 1 is disallowed. */
|
||||||
|
if (sem.num_slots == 1) {
|
||||||
|
nir_src *offset_src = nir_get_io_offset_src(instr);
|
||||||
|
|
||||||
|
/* TODO: nir_opt_loop produces phi 0, 0 for store_output as follows,
|
||||||
|
* which isn't incorrect, but also isn't useful:
|
||||||
|
*
|
||||||
|
* Before:
|
||||||
|
* %1 = load_const 0
|
||||||
|
* loop {
|
||||||
|
* if {
|
||||||
|
* } else {
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* store_output(, %1)
|
||||||
|
*
|
||||||
|
* After:
|
||||||
|
* %1 = load_const 0
|
||||||
|
* if {
|
||||||
|
* } else {
|
||||||
|
* loop {
|
||||||
|
* %38 = load_const 0
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* %52 = phi %1, %38
|
||||||
|
* store_output(, %52)
|
||||||
|
*
|
||||||
|
* Test: KHR-GLES3.shaders.indexing.uniform_array.float_dynamic_loop_read_fragment
|
||||||
|
*
|
||||||
|
* So allow phis in offset_src with num_slots == 1 for now.
|
||||||
|
*/
|
||||||
|
validate_assert(state,
|
||||||
|
(nir_src_is_const(*offset_src) &&
|
||||||
|
nir_src_as_uint(*offset_src) == 0) ||
|
||||||
|
offset_src->ssa->parent_instr->type == nir_instr_type_phi);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nir_intrinsic_has_offset_shift(instr) &&
|
if (nir_intrinsic_has_offset_shift(instr) &&
|
||||||
|
|
|
||||||
|
|
@ -158,6 +158,12 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
||||||
|
|
||||||
nir_intrinsic_set_base(intrin, slot);
|
nir_intrinsic_set_base(intrin, slot);
|
||||||
nir_intrinsic_set_component(intrin, slot_component);
|
nir_intrinsic_set_component(intrin, slot_component);
|
||||||
|
|
||||||
|
/* The code above generates load_input with
|
||||||
|
* "component + num_component > 4", which is theoretically illegal.
|
||||||
|
*/
|
||||||
|
io.no_validate = 1;
|
||||||
|
nir_intrinsic_set_io_semantics(intrin, io);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -192,6 +198,7 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
||||||
vf_element_count++;
|
vf_element_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nir_validate_shader(nir, __func__);
|
||||||
return reg_offset;
|
return reg_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -381,6 +381,12 @@ remap_patch_urb_offsets_instr(nir_builder *b, nir_intrinsic_instr *intrin, void
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_src_rewrite(offset, total_offset);
|
nir_src_rewrite(offset, total_offset);
|
||||||
|
|
||||||
|
/* Putting an address into offset_src requires that NIR validation of
|
||||||
|
* IO intrinsics is disabled.
|
||||||
|
*/
|
||||||
|
io_sem.no_validate = 1;
|
||||||
|
nir_intrinsic_set_io_semantics(intrin, io_sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -426,7 +432,11 @@ lower_per_view_outputs(nir_builder *b,
|
||||||
nir_intrinsic_set_write_mask(new, nir_intrinsic_write_mask(intrin));
|
nir_intrinsic_set_write_mask(new, nir_intrinsic_write_mask(intrin));
|
||||||
nir_intrinsic_set_component(new, nir_intrinsic_component(intrin));
|
nir_intrinsic_set_component(new, nir_intrinsic_component(intrin));
|
||||||
nir_intrinsic_set_src_type(new, nir_intrinsic_src_type(intrin));
|
nir_intrinsic_set_src_type(new, nir_intrinsic_src_type(intrin));
|
||||||
nir_intrinsic_set_io_semantics(new, nir_intrinsic_io_semantics(intrin));
|
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||||
|
/* the meaning of the offset src is different for brw */
|
||||||
|
sem.no_validate = 1;
|
||||||
|
nir_intrinsic_set_io_semantics(new, sem);
|
||||||
|
|
||||||
if (intrin->intrinsic == nir_intrinsic_load_per_view_output)
|
if (intrin->intrinsic == nir_intrinsic_load_per_view_output)
|
||||||
nir_def_rewrite_uses(&intrin->def, &new->def);
|
nir_def_rewrite_uses(&intrin->def, &new->def);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue