brw/jay: move some coarse lowering to NIR

We add a pass to allow testing partially known fs config bits (main
user is DX11 always disabling VRS/coarse).

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41529>
This commit is contained in:
Lionel Landwerlin 2026-05-11 16:47:36 +03:00 committed by Marge Bot
parent d0e0a26776
commit df5a6d7b87
11 changed files with 105 additions and 93 deletions

View file

@ -366,6 +366,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
case nir_intrinsic_load_alpha_to_coverage_enable_ir3:
case nir_intrinsic_load_frag_shading_rate_intel:
case nir_intrinsic_load_msaa_rate_intel:
case nir_intrinsic_test_fs_config_intel:
is_divergent = false;
break;

View file

@ -2807,9 +2807,13 @@ load("push_data_intel", [1], [BASE, RANGE, ACCESS], [CAN_ELIMINATE, CAN_REORDER]
# Dynamic tesselation parameters (see intel_tess_config).
system_value("tess_config_intel", 1)
# Dynamic fragment shader parameters (see intel_fs_config) .
# Dynamic fragment shader parameters (see intel_fs_config).
system_value("fs_config_intel", 1)
# Test a bit in fs_config_intel.
intrinsic("test_fs_config_intel", dest_comp=1, src_comp=[],
indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
# The (linear) local invocation index provided in the payload of mesh/task shaders.
system_value("local_invocation_index_intel", 1)

View file

@ -1527,31 +1527,7 @@ brw_compile_fs(const struct brw_compiler *compiler,
if (prog_data->coarse_pixel_dispatch != INTEL_NEVER)
BRW_NIR_PASS(brw_nir_lower_frag_coord_z, devinfo);
if (!brw_fs_prog_key_is_dynamic(key)) {
uint32_t f = 0;
if (key->multisample_fbo == INTEL_ALWAYS)
f |= INTEL_FS_CONFIG_MULTISAMPLE_FBO;
if (key->alpha_to_coverage == INTEL_ALWAYS)
f |= INTEL_FS_CONFIG_ALPHA_TO_COVERAGE;
if (key->provoking_vertex_last == INTEL_ALWAYS)
f |= INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST;
if (key->persample_interp == INTEL_ALWAYS) {
f |= INTEL_FS_CONFIG_PERSAMPLE_DISPATCH |
INTEL_FS_CONFIG_PERSAMPLE_INTERP;
}
if (prog_data->coarse_pixel_dispatch == INTEL_ALWAYS)
f |= INTEL_FS_CONFIG_COARSE_RT_WRITES;
if (key->conservative_raster == INTEL_ALWAYS)
f |= INTEL_FS_CONFIG_CONSERVATIVE_RASTER;
BRW_NIR_PASS(nir_inline_sysval, nir_intrinsic_load_fs_config_intel, f);
}
BRW_NIR_PASS(brw_nir_lower_fs_config_intel, key, prog_data);
brw_postprocess_nir_opts(pt);

View file

@ -3599,24 +3599,10 @@ static void
emit_frag_shading_rate_setup(nir_to_brw_state &ntb, brw_reg result)
{
const intel_device_info *devinfo = ntb.devinfo;
const brw_builder &bld = ntb.bld;
struct brw_fs_prog_data *fs_prog_data =
brw_fs_prog_data(bld.shader->prog_data);
const brw_builder abld = bld.annotate("compute fragment size");
const brw_builder &abld = ntb.bld.annotate("compute fragment size");
result.type = BRW_TYPE_UD;
bld.MOV(offset(result, bld, 0), brw_imm_ud(1));
bld.MOV(offset(result, bld, 1), brw_imm_ud(1));
/* Coarse pixel shading size fields overlap with other fields of not in
* coarse pixel dispatch mode, so report (1, 1) when that's not the case.
*/
if (fs_prog_data->coarse_pixel_dispatch == INTEL_NEVER)
return;
assert(devinfo->ver >= 11);
/* r1.0 - 0:7 ActualCoarsePixelShadingSize.X */
@ -3624,25 +3610,8 @@ emit_frag_shading_rate_setup(nir_to_brw_state &ntb, brw_reg result)
/* r1.0 - 15:8 ActualCoarsePixelShadingSize.Y */
brw_reg actual_y = byte_offset(actual_x, 1);
brw_reg coarse_size = abld.vgrf(BRW_TYPE_UD, 2);
bld.MOV(offset(coarse_size, bld, 0), actual_x);
bld.MOV(offset(coarse_size, bld, 1), actual_y);
if (fs_prog_data->coarse_pixel_dispatch == INTEL_ALWAYS) {
for (unsigned i = 0; i < 2; i++)
bld.MOV(offset(result, bld, i), offset(coarse_size, bld, i));
return;
}
brw_check_dynamic_fs_config(abld, fs_prog_data,
INTEL_FS_CONFIG_COARSE_RT_WRITES);
for (unsigned i = 0; i < 2; i++) {
set_predicate(BRW_PREDICATE_NORMAL,
abld.SEL(offset(result, bld, i),
offset(coarse_size, bld, i),
offset(result, bld, i)));
}
abld.MOV(offset(result, abld, 0), actual_x);
abld.MOV(offset(result, abld, 1), actual_y);
}
/* Input data is organized with first the per-primitive values, followed

View file

@ -1484,7 +1484,12 @@ lower_frag_shading_rate(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
nir_def *sr = nir_load_frag_shading_rate_intel(b);
nir_def *int_rate_x = nir_ushr_imm(b, nir_channel(b, sr, 0), 1);
nir_def *int_rate_y = nir_ushr_imm(b, nir_channel(b, sr, 1), 1);
nir_def *rate = nir_ior(b, nir_ishl_imm(b, int_rate_x, 2), int_rate_y);
nir_def *coarse_rate = nir_ior(b, nir_ishl_imm(b, int_rate_x, 2), int_rate_y);
nir_def *rate = nir_bcsel(
b,
nir_test_fs_config_intel(b, 1, INTEL_FS_CONFIG_COARSE_RT_WRITES),
coarse_rate, nir_imm_int(b, 0));
nir_def_replace(&intrin->def, rate);
@ -1498,6 +1503,76 @@ brw_nir_lower_frag_shading_rate(nir_shader *nir)
nir_metadata_control_flow, NULL);
}
struct lower_fs_config_state {
uint32_t known_bits;
uint32_t enabled_bits;
};
static bool
lower_fs_config_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
if (intrin->intrinsic != nir_intrinsic_test_fs_config_intel)
return false;
const uint32_t test_bit = nir_intrinsic_base(intrin);
const struct lower_fs_config_state *state = data;
b->cursor = nir_after_instr(&intrin->instr);
nir_def *new_val =
(test_bit & state->known_bits) ?
nir_imm_bool(b, test_bit & state->enabled_bits) :
nir_test_mask(b, nir_load_fs_config_intel(b), test_bit);
nir_def_replace(&intrin->def, new_val);
return true;
}
static uint32_t
generate_fs_config_state_bits(const struct brw_fs_prog_key *key,
const struct brw_fs_prog_data *prog_data,
enum intel_sometimes comp_value)
{
uint32_t f = 0;
if (key->multisample_fbo == comp_value)
f |= INTEL_FS_CONFIG_MULTISAMPLE_FBO;
if (prog_data->alpha_to_coverage == comp_value)
f |= INTEL_FS_CONFIG_ALPHA_TO_COVERAGE;
if (prog_data->provoking_vertex_last == comp_value)
f |= INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST;
if (prog_data->persample_dispatch == comp_value) {
f |= INTEL_FS_CONFIG_PERSAMPLE_DISPATCH |
INTEL_FS_CONFIG_PERSAMPLE_INTERP;
}
if (prog_data->coarse_pixel_dispatch == comp_value)
f |= INTEL_FS_CONFIG_COARSE_RT_WRITES;
if (prog_data->conservative_raster == comp_value)
f |= INTEL_FS_CONFIG_CONSERVATIVE_RASTER;
return f;
}
bool
brw_nir_lower_fs_config_intel(nir_shader *nir,
const struct brw_fs_prog_key *key,
const struct brw_fs_prog_data *prog_data)
{
struct lower_fs_config_state state = {
.known_bits = ~generate_fs_config_state_bits(key, prog_data, INTEL_SOMETIMES),
.enabled_bits = generate_fs_config_state_bits(key, prog_data, INTEL_ALWAYS),
};
return nir_shader_intrinsics_pass(nir, lower_fs_config_intel,
nir_metadata_control_flow, &state);
}
void
brw_nir_lower_fs_inputs(nir_shader *nir,
const struct intel_device_info *devinfo,

View file

@ -238,6 +238,9 @@ void brw_nir_lower_mesh_outputs(nir_shader *nir,
void brw_nir_lower_fs_outputs(nir_shader *nir);
bool brw_nir_lower_fs_load_output(nir_shader *shader,
const struct brw_fs_prog_key *key);
bool brw_nir_lower_fs_config_intel(nir_shader *nir,
const struct brw_fs_prog_key *key,
const struct brw_fs_prog_data *prog_data);
bool brw_nir_lower_frag_coord_z(nir_shader *nir,
const struct intel_device_info *devinfo);

View file

@ -144,9 +144,8 @@ brw_nir_lower_alpha_to_coverage(nir_shader *shader)
nir_def *dither_mask = build_dither_mask(&b, color0);
dither_mask = nir_iand(&b, sample_mask, dither_mask);
nir_def *fs_config = nir_load_fs_config_intel(&b);
nir_def *alpha_to_coverage =
nir_test_mask(&b, fs_config, INTEL_FS_CONFIG_ALPHA_TO_COVERAGE);
nir_def *alpha_to_coverage = nir_test_fs_config_intel(
&b, 1, INTEL_FS_CONFIG_ALPHA_TO_COVERAGE);
dither_mask = nir_bcsel(&b, alpha_to_coverage,
dither_mask, sample_mask_write->src[0].ssa);

View file

@ -45,9 +45,8 @@ lower_flat_inputs(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
nir_def *first_vtx = load_input_vertex(b, intrin, 0, intrin->def.num_components);
nir_def *last_vtx = load_input_vertex(b, intrin, 2, intrin->def.num_components);
nir_def *fs_config = nir_load_fs_config_intel(b);
nir_def *last = nir_test_mask(b, fs_config, INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST);
nir_def *last = nir_test_fs_config_intel(
b, 1, INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST);
nir_def *input_vertex = nir_bcsel(b, last, last_vtx, first_vtx);
nir_def_replace(&intrin->def, input_vertex);

View file

@ -39,9 +39,8 @@ lower_fully_covered(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
nir_def *fully_covered = nir_ieq(b, coverage_mask, expected_mask);
nir_def *fs_config = nir_load_fs_config_intel(b);
nir_def *cons_raster_on =
nir_test_mask(b, fs_config, INTEL_FS_CONFIG_CONSERVATIVE_RASTER);
nir_def *cons_raster_on = nir_test_fs_config_intel(
b, 1, INTEL_FS_CONFIG_CONSERVATIVE_RASTER);
fully_covered = nir_bcsel(b, cons_raster_on, fully_covered, nir_imm_false(b));

View file

@ -563,11 +563,11 @@ brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir,
per_primitive_stride = align(per_primitive_stride, devinfo->grf_size);
nir_def *fs_config = nir_load_fs_config_intel(b);
nir_def *needs_remapping = nir_test_mask(
b, fs_config, INTEL_FS_CONFIG_PER_PRIMITIVE_REMAPPING);
nir_def *needs_remapping = nir_test_fs_config_intel(
b, 1, INTEL_FS_CONFIG_PER_PRIMITIVE_REMAPPING);
nir_push_if(b, needs_remapping);
{
nir_def *fs_config = nir_load_fs_config_intel(b);
nir_def *first_slot =
nir_ubitfield_extract_imm(
b, fs_config,

View file

@ -392,26 +392,12 @@ jay_process_nir(const struct intel_device_info *devinfo,
// TODO
// NIR_PASS(_, nir, brw_nir_move_interpolation_to_top);
if (!brw_fs_prog_key_is_dynamic(&key->fs)) {
uint32_t f = 0;
/* Do this before lower_fs_config_intel so that the pass has the right
* information.
*/
jay_populate_prog_data(devinfo, nir, prog_data, key, 0);
if (key->fs.multisample_fbo == INTEL_ALWAYS)
f |= INTEL_FS_CONFIG_MULTISAMPLE_FBO;
if (key->fs.alpha_to_coverage == INTEL_ALWAYS)
f |= INTEL_FS_CONFIG_ALPHA_TO_COVERAGE;
if (key->fs.provoking_vertex_last == INTEL_ALWAYS)
f |= INTEL_FS_CONFIG_PROVOKING_VERTEX_LAST;
if (key->fs.persample_interp == INTEL_ALWAYS) {
f |= INTEL_FS_CONFIG_PERSAMPLE_DISPATCH |
INTEL_FS_CONFIG_PERSAMPLE_INTERP;
}
NIR_PASS(_, nir, nir_inline_sysval, nir_intrinsic_load_fs_config_intel,
f);
}
NIR_PASS(_, nir, brw_nir_lower_fs_config_intel, &key->fs, &prog_data->fs);
} else {
brw_nir_apply_key(pt, &key->base, simd_width);
}
@ -465,6 +451,7 @@ jay_process_nir(const struct intel_device_info *devinfo,
nj_index_ssa_defs(nir);
nir_divergence_analysis(nir);
jay_populate_prog_data(devinfo, nir, prog_data, key, nr_packed_regs);
if (stage != MESA_SHADER_FRAGMENT)
jay_populate_prog_data(devinfo, nir, prog_data, key, nr_packed_regs);
return simd_width;
}