diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c index f79e017e10a..f7a77838af1 100644 --- a/src/broadcom/compiler/v3d_nir_lower_io.c +++ b/src/broadcom/compiler/v3d_nir_lower_io.c @@ -86,7 +86,8 @@ v3d_nir_store_output(nir_builder *b, int base, nir_def *offset, } nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0, - .src_type = nir_type_uint | chan->bit_size); + .src_type = nir_type_uint | chan->bit_size, + .io_semantics.no_validate = 1); } static int diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 26cb0921296..e327e23e0d3 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1995,7 +1995,8 @@ typedef struct nir_io_semantics { unsigned no_sysval_output : 1; /* whether this system value output has no effect due to current pipeline states */ unsigned interp_explicit_strict : 1; /* preserve original vertex order */ - unsigned _pad : 1; + /* Skip nir_validate of the intrinsic. Any new code that sets it will ba NAK'd. */ + unsigned no_validate : 1; } nir_io_semantics; /* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c index 44020680e93..7f1423c9e0d 100644 --- a/src/compiler/nir/nir_lower_clip.c +++ b/src/compiler/nir/nir_lower_clip.c @@ -99,6 +99,10 @@ store_clipdist_output(nir_builder *b, nir_variable *out, int location, int locat nir_io_semantics semantics = { .location = location, .num_slots = b->shader->options->compact_arrays ? num_slots : 1, + /* the offset src has a different definition for compact + * arrays, and is unfoldable (nir_validate requires that constant + * offsets are always 0) */ + .no_validate = b->shader->options->compact_arrays, }; if (location == VARYING_SLOT_CLIP_DIST1 || location_offset) @@ -120,20 +124,32 @@ static void load_clipdist_input(nir_builder *b, nir_variable *in, int location_offset, nir_def **val, bool use_load_interp) { + bool compact_arrays = b->shader->options->compact_arrays; + nir_def *offset = nir_imm_int(b, compact_arrays ? location_offset : 0); + unsigned const_offset = compact_arrays ? 0 : location_offset; + nir_def *load; if (use_load_interp) { /* TODO: use sample when per-sample shading? */ nir_def *barycentric = nir_load_barycentric( b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_NONE); load = nir_load_interpolated_input( - b, 4, 32, barycentric, nir_imm_int(b, location_offset), - .base = in->data.driver_location, - .io_semantics.location = in->data.location); + b, 4, 32, barycentric, offset, + .base = in->data.driver_location + const_offset, + .io_semantics.location = in->data.location + const_offset, + /* the offset src has a different definition for compact + * arrays, and is unfoldable (nir_validate requires that constant + * offsets are always 0) */ + .io_semantics.no_validate = compact_arrays); } else { - load = nir_load_input(b, 4, 32, nir_imm_int(b, location_offset), - .base = in->data.driver_location, - .io_semantics.location = in->data.location); + load = nir_load_input(b, 4, 32, offset, + .base = in->data.driver_location + const_offset, + .io_semantics.location = in->data.location + const_offset, + /* the offset src has a different definition for compact + * arrays, and is unfoldable (nir_validate requires that constant + * offsets are always 0) */ + .io_semantics.no_validate = compact_arrays); } val[0] = nir_channel(b, load, 0); diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index d5745a1f709..e3b9947b270 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -1530,6 +1530,9 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) fprintf(fp, ")"); } + if (io.no_validate) + fprintf(fp, " no_validate"); + break; } diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c index 5f40a83d3eb..012554ccc28 100644 --- a/src/compiler/nir/nir_validate.c +++ b/src/compiler/nir/nir_validate.c @@ -778,36 +778,123 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) } if (nir_intrinsic_has_io_semantics(instr) && - !nir_intrinsic_infos[instr->intrinsic].has_dest) { - nir_io_semantics sem = nir_intrinsic_io_semantics(instr); + !nir_intrinsic_io_semantics(instr).no_validate) { + bool is_input = instr->intrinsic == nir_intrinsic_load_input || + instr->intrinsic == nir_intrinsic_load_per_vertex_input || + instr->intrinsic == nir_intrinsic_load_per_primitive_input || + instr->intrinsic == nir_intrinsic_load_interpolated_input || + instr->intrinsic == nir_intrinsic_load_input_vertex; + bool is_output = instr->intrinsic == nir_intrinsic_load_output || + instr->intrinsic == nir_intrinsic_load_per_vertex_output || + instr->intrinsic == nir_intrinsic_load_per_primitive_output || + instr->intrinsic == nir_intrinsic_load_per_view_output || + instr->intrinsic == nir_intrinsic_store_output || + instr->intrinsic == nir_intrinsic_store_per_vertex_output || + instr->intrinsic == nir_intrinsic_store_per_primitive_output || + instr->intrinsic == nir_intrinsic_store_per_view_output; + /* Driver-specific intrinsics with IO semantics are not validated. */ + bool is_core_intrinsic = is_input || is_output; - /* An output that has no effect shouldn't be present in the IR. */ - validate_assert(state, - (nir_slot_is_sysval_output(sem.location, MESA_SHADER_NONE) && - !sem.no_sysval_output) || - (nir_slot_is_varying(sem.location, MESA_SHADER_NONE) && - !sem.no_varying) || - nir_instr_xfb_write_mask(instr) || - /* TCS can set no_varying and no_sysval_output, meaning - * that the output is only read by TCS and not TES. - */ - state->shader->info.stage == MESA_SHADER_TESS_CTRL); - validate_assert(state, - (!sem.dual_source_blend_index && - !sem.fb_fetch_output && - !sem.fb_fetch_output_coherent) || - state->shader->info.stage == MESA_SHADER_FRAGMENT); - validate_assert(state, - !sem.gs_streams || - state->shader->info.stage == MESA_SHADER_GEOMETRY); - validate_assert(state, - !sem.high_dvec2 || - (state->shader->info.stage == MESA_SHADER_VERTEX && - instr->intrinsic == nir_intrinsic_load_input)); - validate_assert(state, - !sem.interp_explicit_strict || + if (is_core_intrinsic) { + nir_io_semantics sem = nir_intrinsic_io_semantics(instr); + unsigned max_num_components; + + /* Validate that a single intrinsic doesn't access past the maximum + * number of components per slot. + */ + /* TODO: remove nir_io_radv_intrinsic_component_workaround */ + if (state->shader->options->io_options & + nir_io_radv_intrinsic_component_workaround && sem.high_16bits) + max_num_components = 8; + else + max_num_components = 4; + + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { + /* Input and output loads shouldn't load from multiple slots at once. */ + validate_assert(state, nir_intrinsic_component(instr) + + instr->def.num_components <= max_num_components); + } else { + /* Output stores shouldn't write to multiple slots at once. */ + validate_assert(state, nir_intrinsic_component(instr) + + instr->num_components <= max_num_components); + } + + if (is_input) { + validate_assert(state, !sem.no_sysval_output); + validate_assert(state, !sem.no_varying); + validate_assert(state, !sem.gs_streams); + } else { + /* An output that has no effect shouldn't be present in the IR. */ + validate_assert(state, + (nir_slot_is_sysval_output(sem.location, MESA_SHADER_NONE) && + !sem.no_sysval_output) || + (nir_slot_is_varying(sem.location, MESA_SHADER_NONE) && + !sem.no_varying) || + nir_instr_xfb_write_mask(instr) || + /* TCS can set no_varying and no_sysval_output, meaning + * that the output is only read by TCS and not TES. + */ + state->shader->info.stage == MESA_SHADER_TESS_CTRL); + + validate_assert(state, + !sem.gs_streams || + state->shader->info.stage == MESA_SHADER_GEOMETRY); + } + + validate_assert(state, + (!sem.dual_source_blend_index && + !sem.fb_fetch_output && + !sem.fb_fetch_output_coherent) || (state->shader->info.stage == MESA_SHADER_FRAGMENT && - instr->intrinsic == nir_intrinsic_load_input_vertex)); + is_output)); + + validate_assert(state, + !sem.high_dvec2 || + (state->shader->info.stage == MESA_SHADER_VERTEX && + is_input)); + + validate_assert(state, + !sem.interp_explicit_strict || + (state->shader->info.stage == MESA_SHADER_FRAGMENT && + instr->intrinsic == nir_intrinsic_load_input_vertex)); + + /* Non-zero src offset with num_slots == 1 is disallowed. */ + if (sem.num_slots == 1) { + nir_src *offset_src = nir_get_io_offset_src(instr); + + /* TODO: nir_opt_loop produces phi 0, 0 for store_output as follows, + * which isn't incorrect, but also isn't useful: + * + * Before: + * %1 = load_const 0 + * loop { + * if { + * } else { + * } + * } + * store_output(, %1) + * + * After: + * %1 = load_const 0 + * if { + * } else { + * loop { + * %38 = load_const 0 + * } + * } + * %52 = phi %1, %38 + * store_output(, %52) + * + * Test: KHR-GLES3.shaders.indexing.uniform_array.float_dynamic_loop_read_fragment + * + * So allow phis in offset_src with num_slots == 1 for now. + */ + validate_assert(state, + (nir_src_is_const(*offset_src) && + nir_src_as_uint(*offset_src) == 0) || + offset_src->ssa->parent_instr->type == nir_instr_type_phi); + } + } } if (nir_intrinsic_has_offset_shift(instr) && diff --git a/src/intel/compiler/brw/brw_compile_vs.cpp b/src/intel/compiler/brw/brw_compile_vs.cpp index ab219d8a77a..e9bb184a4b5 100644 --- a/src/intel/compiler/brw/brw_compile_vs.cpp +++ b/src/intel/compiler/brw/brw_compile_vs.cpp @@ -158,6 +158,12 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data) nir_intrinsic_set_base(intrin, slot); nir_intrinsic_set_component(intrin, slot_component); + + /* The code above generates load_input with + * "component + num_component > 4", which is theoretically illegal. + */ + io.no_validate = 1; + nir_intrinsic_set_io_semantics(intrin, io); } } } @@ -192,6 +198,7 @@ brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data) vf_element_count++; } + nir_validate_shader(nir, __func__); return reg_offset; } diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index dc29f37621c..d4f7ae6f554 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -381,6 +381,12 @@ remap_patch_urb_offsets_instr(nir_builder *b, nir_intrinsic_instr *intrin, void } nir_src_rewrite(offset, total_offset); + + /* Putting an address into offset_src requires that NIR validation of + * IO intrinsics is disabled. + */ + io_sem.no_validate = 1; + nir_intrinsic_set_io_semantics(intrin, io_sem); } return true; @@ -426,7 +432,11 @@ lower_per_view_outputs(nir_builder *b, nir_intrinsic_set_write_mask(new, nir_intrinsic_write_mask(intrin)); nir_intrinsic_set_component(new, nir_intrinsic_component(intrin)); nir_intrinsic_set_src_type(new, nir_intrinsic_src_type(intrin)); - nir_intrinsic_set_io_semantics(new, nir_intrinsic_io_semantics(intrin)); + + nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); + /* the meaning of the offset src is different for brw */ + sem.no_validate = 1; + nir_intrinsic_set_io_semantics(new, sem); if (intrin->intrinsic == nir_intrinsic_load_per_view_output) nir_def_rewrite_uses(&intrin->def, &new->def);