diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 9ee3d805576..6d4c64db4b3 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -366,6 +366,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_load_alpha_to_coverage_enable_ir3: case nir_intrinsic_load_frag_shading_rate_intel: case nir_intrinsic_load_msaa_rate_intel: + case nir_intrinsic_test_fs_config_intel: is_divergent = false; break; diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 653442fe0e5..d18797a93b3 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2807,9 +2807,13 @@ load("push_data_intel", [1], [BASE, RANGE, ACCESS], [CAN_ELIMINATE, CAN_REORDER] # Dynamic tesselation parameters (see intel_tess_config). system_value("tess_config_intel", 1) -# Dynamic fragment shader parameters (see intel_fs_config) . +# Dynamic fragment shader parameters (see intel_fs_config). system_value("fs_config_intel", 1) +# Test a bit in fs_config_intel. +intrinsic("test_fs_config_intel", dest_comp=1, src_comp=[], + indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER]) + # The (linear) local invocation index provided in the payload of mesh/task shaders. system_value("local_invocation_index_intel", 1) diff --git a/src/intel/compiler/brw/brw_compile_fs.cpp b/src/intel/compiler/brw/brw_compile_fs.cpp index da8dd583277..294421bea2d 100644 --- a/src/intel/compiler/brw/brw_compile_fs.cpp +++ b/src/intel/compiler/brw/brw_compile_fs.cpp @@ -1527,31 +1527,7 @@ brw_compile_fs(const struct brw_compiler *compiler, if (prog_data->coarse_pixel_dispatch != INTEL_NEVER) BRW_NIR_PASS(brw_nir_lower_frag_coord_z, devinfo); - if (!brw_fs_prog_key_is_dynamic(key)) { - uint32_t f = 0; - - if (key->multisample_fbo == INTEL_ALWAYS) - f |= INTEL_FS_CONFIG_MULTISAMPLE_FBO; - - if (key->alpha_to_coverage == INTEL_ALWAYS) - f |= INTEL_FS_CONFIG_ALPHA_TO_COVERAGE; - - if (key->provoking_vertex_last == INTEL_ALWAYS) - f |= INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST; - - if (key->persample_interp == INTEL_ALWAYS) { - f |= INTEL_FS_CONFIG_PERSAMPLE_DISPATCH | - INTEL_FS_CONFIG_PERSAMPLE_INTERP; - } - - if (prog_data->coarse_pixel_dispatch == INTEL_ALWAYS) - f |= INTEL_FS_CONFIG_COARSE_RT_WRITES; - - if (key->conservative_raster == INTEL_ALWAYS) - f |= INTEL_FS_CONFIG_CONSERVATIVE_RASTER; - - BRW_NIR_PASS(nir_inline_sysval, nir_intrinsic_load_fs_config_intel, f); - } + BRW_NIR_PASS(brw_nir_lower_fs_config_intel, key, prog_data); brw_postprocess_nir_opts(pt); diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 7a2cd2b4c57..943f96fc646 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -3599,24 +3599,10 @@ static void emit_frag_shading_rate_setup(nir_to_brw_state &ntb, brw_reg result) { const intel_device_info *devinfo = ntb.devinfo; - const brw_builder &bld = ntb.bld; - - struct brw_fs_prog_data *fs_prog_data = - brw_fs_prog_data(bld.shader->prog_data); - - const brw_builder abld = bld.annotate("compute fragment size"); + const brw_builder &abld = ntb.bld.annotate("compute fragment size"); result.type = BRW_TYPE_UD; - bld.MOV(offset(result, bld, 0), brw_imm_ud(1)); - bld.MOV(offset(result, bld, 1), brw_imm_ud(1)); - - /* Coarse pixel shading size fields overlap with other fields of not in - * coarse pixel dispatch mode, so report (1, 1) when that's not the case. - */ - if (fs_prog_data->coarse_pixel_dispatch == INTEL_NEVER) - return; - assert(devinfo->ver >= 11); /* r1.0 - 0:7 ActualCoarsePixelShadingSize.X */ @@ -3624,25 +3610,8 @@ emit_frag_shading_rate_setup(nir_to_brw_state &ntb, brw_reg result) /* r1.0 - 15:8 ActualCoarsePixelShadingSize.Y */ brw_reg actual_y = byte_offset(actual_x, 1); - brw_reg coarse_size = abld.vgrf(BRW_TYPE_UD, 2); - - bld.MOV(offset(coarse_size, bld, 0), actual_x); - bld.MOV(offset(coarse_size, bld, 1), actual_y); - - if (fs_prog_data->coarse_pixel_dispatch == INTEL_ALWAYS) { - for (unsigned i = 0; i < 2; i++) - bld.MOV(offset(result, bld, i), offset(coarse_size, bld, i)); - return; - } - - brw_check_dynamic_fs_config(abld, fs_prog_data, - INTEL_FS_CONFIG_COARSE_RT_WRITES); - for (unsigned i = 0; i < 2; i++) { - set_predicate(BRW_PREDICATE_NORMAL, - abld.SEL(offset(result, bld, i), - offset(coarse_size, bld, i), - offset(result, bld, i))); - } + abld.MOV(offset(result, abld, 0), actual_x); + abld.MOV(offset(result, abld, 1), actual_y); } /* Input data is organized with first the per-primitive values, followed diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index a0965841bbc..ae421a88cb8 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -1484,7 +1484,12 @@ lower_frag_shading_rate(nir_builder *b, nir_intrinsic_instr *intrin, void *data) nir_def *sr = nir_load_frag_shading_rate_intel(b); nir_def *int_rate_x = nir_ushr_imm(b, nir_channel(b, sr, 0), 1); nir_def *int_rate_y = nir_ushr_imm(b, nir_channel(b, sr, 1), 1); - nir_def *rate = nir_ior(b, nir_ishl_imm(b, int_rate_x, 2), int_rate_y); + nir_def *coarse_rate = nir_ior(b, nir_ishl_imm(b, int_rate_x, 2), int_rate_y); + + nir_def *rate = nir_bcsel( + b, + nir_test_fs_config_intel(b, 1, INTEL_FS_CONFIG_COARSE_RT_WRITES), + coarse_rate, nir_imm_int(b, 0)); nir_def_replace(&intrin->def, rate); @@ -1498,6 +1503,76 @@ brw_nir_lower_frag_shading_rate(nir_shader *nir) nir_metadata_control_flow, NULL); } +struct lower_fs_config_state { + uint32_t known_bits; + uint32_t enabled_bits; +}; + +static bool +lower_fs_config_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data) +{ + if (intrin->intrinsic != nir_intrinsic_test_fs_config_intel) + return false; + + const uint32_t test_bit = nir_intrinsic_base(intrin); + const struct lower_fs_config_state *state = data; + + b->cursor = nir_after_instr(&intrin->instr); + + nir_def *new_val = + (test_bit & state->known_bits) ? + nir_imm_bool(b, test_bit & state->enabled_bits) : + nir_test_mask(b, nir_load_fs_config_intel(b), test_bit); + nir_def_replace(&intrin->def, new_val); + + return true; +} + +static uint32_t +generate_fs_config_state_bits(const struct brw_fs_prog_key *key, + const struct brw_fs_prog_data *prog_data, + enum intel_sometimes comp_value) +{ + uint32_t f = 0; + + if (key->multisample_fbo == comp_value) + f |= INTEL_FS_CONFIG_MULTISAMPLE_FBO; + + if (prog_data->alpha_to_coverage == comp_value) + f |= INTEL_FS_CONFIG_ALPHA_TO_COVERAGE; + + if (prog_data->provoking_vertex_last == comp_value) + f |= INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST; + + if (prog_data->persample_dispatch == comp_value) { + f |= INTEL_FS_CONFIG_PERSAMPLE_DISPATCH | + INTEL_FS_CONFIG_PERSAMPLE_INTERP; + } + + if (prog_data->coarse_pixel_dispatch == comp_value) + f |= INTEL_FS_CONFIG_COARSE_RT_WRITES; + + if (prog_data->conservative_raster == comp_value) + f |= INTEL_FS_CONFIG_CONSERVATIVE_RASTER; + + return f; +} + + +bool +brw_nir_lower_fs_config_intel(nir_shader *nir, + const struct brw_fs_prog_key *key, + const struct brw_fs_prog_data *prog_data) +{ + struct lower_fs_config_state state = { + .known_bits = ~generate_fs_config_state_bits(key, prog_data, INTEL_SOMETIMES), + .enabled_bits = generate_fs_config_state_bits(key, prog_data, INTEL_ALWAYS), + }; + + return nir_shader_intrinsics_pass(nir, lower_fs_config_intel, + nir_metadata_control_flow, &state); +} + void brw_nir_lower_fs_inputs(nir_shader *nir, const struct intel_device_info *devinfo, diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index bb509e77099..8742f8e6d5f 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -238,6 +238,9 @@ void brw_nir_lower_mesh_outputs(nir_shader *nir, void brw_nir_lower_fs_outputs(nir_shader *nir); bool brw_nir_lower_fs_load_output(nir_shader *shader, const struct brw_fs_prog_key *key); +bool brw_nir_lower_fs_config_intel(nir_shader *nir, + const struct brw_fs_prog_key *key, + const struct brw_fs_prog_data *prog_data); bool brw_nir_lower_frag_coord_z(nir_shader *nir, const struct intel_device_info *devinfo); diff --git a/src/intel/compiler/brw/brw_nir_lower_alpha_to_coverage.c b/src/intel/compiler/brw/brw_nir_lower_alpha_to_coverage.c index 3dd6c2b7a72..20e024127ea 100644 --- a/src/intel/compiler/brw/brw_nir_lower_alpha_to_coverage.c +++ b/src/intel/compiler/brw/brw_nir_lower_alpha_to_coverage.c @@ -144,9 +144,8 @@ brw_nir_lower_alpha_to_coverage(nir_shader *shader) nir_def *dither_mask = build_dither_mask(&b, color0); dither_mask = nir_iand(&b, sample_mask, dither_mask); - nir_def *fs_config = nir_load_fs_config_intel(&b); - nir_def *alpha_to_coverage = - nir_test_mask(&b, fs_config, INTEL_FS_CONFIG_ALPHA_TO_COVERAGE); + nir_def *alpha_to_coverage = nir_test_fs_config_intel( + &b, 1, INTEL_FS_CONFIG_ALPHA_TO_COVERAGE); dither_mask = nir_bcsel(&b, alpha_to_coverage, dither_mask, sample_mask_write->src[0].ssa); diff --git a/src/intel/compiler/brw/brw_nir_lower_fs_barycentrics.c b/src/intel/compiler/brw/brw_nir_lower_fs_barycentrics.c index 91beac381fa..9313c858611 100644 --- a/src/intel/compiler/brw/brw_nir_lower_fs_barycentrics.c +++ b/src/intel/compiler/brw/brw_nir_lower_fs_barycentrics.c @@ -45,9 +45,8 @@ lower_flat_inputs(nir_builder *b, nir_intrinsic_instr *intrin, void *data) nir_def *first_vtx = load_input_vertex(b, intrin, 0, intrin->def.num_components); nir_def *last_vtx = load_input_vertex(b, intrin, 2, intrin->def.num_components); - nir_def *fs_config = nir_load_fs_config_intel(b); - - nir_def *last = nir_test_mask(b, fs_config, INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST); + nir_def *last = nir_test_fs_config_intel( + b, 1, INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST); nir_def *input_vertex = nir_bcsel(b, last, last_vtx, first_vtx); nir_def_replace(&intrin->def, input_vertex); diff --git a/src/intel/compiler/brw/brw_nir_lower_fully_covered.c b/src/intel/compiler/brw/brw_nir_lower_fully_covered.c index 7bcc2ea100c..19835146d27 100644 --- a/src/intel/compiler/brw/brw_nir_lower_fully_covered.c +++ b/src/intel/compiler/brw/brw_nir_lower_fully_covered.c @@ -39,9 +39,8 @@ lower_fully_covered(nir_builder *b, nir_intrinsic_instr *intrin, void *data) nir_def *fully_covered = nir_ieq(b, coverage_mask, expected_mask); - nir_def *fs_config = nir_load_fs_config_intel(b); - nir_def *cons_raster_on = - nir_test_mask(b, fs_config, INTEL_FS_CONFIG_CONSERVATIVE_RASTER); + nir_def *cons_raster_on = nir_test_fs_config_intel( + b, 1, INTEL_FS_CONFIG_CONSERVATIVE_RASTER); fully_covered = nir_bcsel(b, cons_raster_on, fully_covered, nir_imm_false(b)); diff --git a/src/intel/compiler/brw/brw_nir_wa_18019110168.c b/src/intel/compiler/brw/brw_nir_wa_18019110168.c index 8110059e6fc..47d349b14c6 100644 --- a/src/intel/compiler/brw/brw_nir_wa_18019110168.c +++ b/src/intel/compiler/brw/brw_nir_wa_18019110168.c @@ -563,11 +563,11 @@ brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir, per_primitive_stride = align(per_primitive_stride, devinfo->grf_size); - nir_def *fs_config = nir_load_fs_config_intel(b); - nir_def *needs_remapping = nir_test_mask( - b, fs_config, INTEL_FS_CONFIG_PER_PRIMITIVE_REMAPPING); + nir_def *needs_remapping = nir_test_fs_config_intel( + b, 1, INTEL_FS_CONFIG_PER_PRIMITIVE_REMAPPING); nir_push_if(b, needs_remapping); { + nir_def *fs_config = nir_load_fs_config_intel(b); nir_def *first_slot = nir_ubitfield_extract_imm( b, fs_config, diff --git a/src/intel/compiler/jay/jay_nir.c b/src/intel/compiler/jay/jay_nir.c index 1b723eea4a2..74cfe7c8962 100644 --- a/src/intel/compiler/jay/jay_nir.c +++ b/src/intel/compiler/jay/jay_nir.c @@ -392,26 +392,12 @@ jay_process_nir(const struct intel_device_info *devinfo, // TODO // NIR_PASS(_, nir, brw_nir_move_interpolation_to_top); - if (!brw_fs_prog_key_is_dynamic(&key->fs)) { - uint32_t f = 0; + /* Do this before lower_fs_config_intel so that the pass has the right + * information. + */ + jay_populate_prog_data(devinfo, nir, prog_data, key, 0); - if (key->fs.multisample_fbo == INTEL_ALWAYS) - f |= INTEL_FS_CONFIG_MULTISAMPLE_FBO; - - if (key->fs.alpha_to_coverage == INTEL_ALWAYS) - f |= INTEL_FS_CONFIG_ALPHA_TO_COVERAGE; - - if (key->fs.provoking_vertex_last == INTEL_ALWAYS) - f |= INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST; - - if (key->fs.persample_interp == INTEL_ALWAYS) { - f |= INTEL_FS_CONFIG_PERSAMPLE_DISPATCH | - INTEL_FS_CONFIG_PERSAMPLE_INTERP; - } - - NIR_PASS(_, nir, nir_inline_sysval, nir_intrinsic_load_fs_config_intel, - f); - } + NIR_PASS(_, nir, brw_nir_lower_fs_config_intel, &key->fs, &prog_data->fs); } else { brw_nir_apply_key(pt, &key->base, simd_width); } @@ -465,6 +451,7 @@ jay_process_nir(const struct intel_device_info *devinfo, nj_index_ssa_defs(nir); nir_divergence_analysis(nir); - jay_populate_prog_data(devinfo, nir, prog_data, key, nr_packed_regs); + if (stage != MESA_SHADER_FRAGMENT) + jay_populate_prog_data(devinfo, nir, prog_data, key, nr_packed_regs); return simd_width; }