diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 943f96fc646..b4c441bc4de 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -53,7 +53,6 @@ static nir_component_mask_t get_nir_write_mask(const nir_def &def); static void brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, const brw_builder &bld, nir_intrinsic_instr *instr); static brw_reg emit_samplepos_setup(nir_to_brw_state &ntb); static brw_reg emit_sampleid_setup(nir_to_brw_state &ntb); -static brw_reg emit_samplemaskin_setup(nir_to_brw_state &ntb); static void brw_from_nir_emit_impl(nir_to_brw_state &ntb, nir_function_impl *impl); static void brw_from_nir_emit_cf_list(nir_to_brw_state &ntb, exec_list *list); @@ -148,13 +147,6 @@ emit_system_values_block(nir_to_brw_state &ntb, nir_block *block) *reg = emit_sampleid_setup(ntb); break; - case nir_intrinsic_load_sample_mask_in: - assert(s.stage == MESA_SHADER_FRAGMENT); - reg = &ntb.system_values[SYSTEM_VALUE_SAMPLE_MASK_IN]; - if (reg->file == BAD_FILE) - *reg = emit_samplemaskin_setup(ntb); - break; - case nir_intrinsic_load_workgroup_id: if (mesa_shader_stage_is_mesh(s.stage)) UNREACHABLE("should be lowered by nir_lower_compute_system_values()."); @@ -3544,57 +3536,6 @@ emit_sampleid_setup(nir_to_brw_state &ntb) return sample_id; } -static brw_reg -emit_samplemaskin_setup(nir_to_brw_state &ntb) -{ - const intel_device_info *devinfo = ntb.devinfo; - const brw_builder &bld = ntb.bld; - brw_shader &s = ntb.s; - - assert(s.stage == MESA_SHADER_FRAGMENT); - struct brw_fs_prog_data *fs_prog_data = brw_fs_prog_data(s.prog_data); - - /* DG2 should support this, but Wa_22012766191 says there are issues - * with CPS 1x1 + MSAA + FS writing to oMask. - */ - assert(devinfo->verx10 >= 200 || - fs_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS); - - brw_reg coverage_mask = - brw_fetch_payload_reg(bld, s.fs_payload().sample_mask_in_reg, BRW_TYPE_UD); - - if (fs_prog_data->persample_dispatch == INTEL_NEVER) - return coverage_mask; - - /* gl_SampleMaskIn[] comes from two sources: the input coverage mask, - * and a mask representing which sample is being processed by the - * current shader invocation. - * - * From the OES_sample_variables specification: - * "When per-sample shading is active due to the use of a fragment input - * qualified by "sample" or due to the use of the gl_SampleID or - * gl_SamplePosition variables, only the bit for the current sample is - * set in gl_SampleMaskIn." - */ - const brw_builder abld = bld.annotate("compute gl_SampleMaskIn"); - - if (ntb.system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE) - ntb.system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(ntb); - - brw_reg enabled_mask = - abld.SHL(brw_imm_ud(1), ntb.system_values[SYSTEM_VALUE_SAMPLE_ID]); - brw_reg mask = abld.AND(enabled_mask, coverage_mask); - - if (fs_prog_data->persample_dispatch == INTEL_ALWAYS) - return mask; - - brw_check_dynamic_fs_config(abld, fs_prog_data, - INTEL_FS_CONFIG_PERSAMPLE_DISPATCH); - set_predicate(BRW_PREDICATE_NORMAL, abld.SEL(mask, mask, coverage_mask)); - - return mask; -} - static void emit_frag_shading_rate_setup(nir_to_brw_state &ntb, brw_reg result) { @@ -3761,7 +3702,6 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, break; case nir_intrinsic_load_helper_invocation: - case nir_intrinsic_load_sample_mask_in: case nir_intrinsic_load_sample_id: { gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); brw_reg val = ntb.system_values[sv]; diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index ae421a88cb8..54388b88bdf 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -1573,6 +1573,41 @@ brw_nir_lower_fs_config_intel(nir_shader *nir, nir_metadata_control_flow, &state); } +static bool +lower_sample_mask_in_instr(nir_builder *b, + nir_intrinsic_instr *intrin, + void *data) +{ + if (intrin->intrinsic != nir_intrinsic_load_sample_mask_in) + return false; + + b->cursor = nir_before_instr(&intrin->instr); + + nir_def *sample_mask_in_reg = nir_load_coverage_mask_intel(b); + + nir_def *sample_id = nir_load_sample_id(b); + nir_def *sample_mask_in_msaa = + nir_iand(b, + nir_ishl(b, nir_imm_int(b, 1), sample_id), + sample_mask_in_reg); + + nir_def *sample_mask_in = nir_bcsel( + b, + nir_test_fs_config_intel(b, 1, INTEL_FS_CONFIG_PERSAMPLE_DISPATCH), + sample_mask_in_msaa, sample_mask_in_reg); + + nir_def_replace(&intrin->def, sample_mask_in); + + return true; +} + +static bool +brw_nir_lower_sample_mask_in(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, lower_sample_mask_in_instr, + nir_metadata_control_flow, NULL); +} + void brw_nir_lower_fs_inputs(nir_shader *nir, const struct intel_device_info *devinfo, @@ -1657,6 +1692,9 @@ brw_nir_lower_fs_inputs(nir_shader *nir, NULL); } + /* Do this after nir_lower_single_sampled */ + NIR_PASS(_, nir, brw_nir_lower_sample_mask_in); + if (devinfo->ver < 20) { NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_barycentric_at_offset, diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index d2a965b548d..f5ca76378d4 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -1243,7 +1243,7 @@ jay_emit_intrinsic(struct nir_to_jay_state *nj, nir_intrinsic_instr *intr) JAY_TYPE_U32; break; - case nir_intrinsic_load_sample_mask_in: { + case nir_intrinsic_load_coverage_mask_intel: { jay_def mask = jay_extract(nj->payload.u0, 15); if (nj->s->dispatch_width == 32) { diff --git a/src/intel/compiler/jay/jay_nir.c b/src/intel/compiler/jay/jay_nir.c index 74cfe7c8962..1241d4f9c5b 100644 --- a/src/intel/compiler/jay/jay_nir.c +++ b/src/intel/compiler/jay/jay_nir.c @@ -51,7 +51,7 @@ lower_helper_invocation(nir_builder *b, nir_intrinsic_instr *intr, void *_) /* TODO: Is this right for multisampling? */ b->cursor = nir_before_instr(&intr->instr); nir_def *active = - nir_inot(b, nir_inverse_ballot(b, nir_load_sample_mask_in(b))); + nir_inot(b, nir_inverse_ballot(b, nir_load_coverage_mask_intel(b))); nir_def_replace(&intr->def, active); return true;