brw: Switch FS outputs to semantic IO and FRAG_RESULT_DUAL_SRC_BLEND

The new FRAG_RESULT_DUAL_SRC_BLEND option is easier to work with than
looking for FRAG_RESULT_DATA0 with an index of 1.  This also means we
no longer care about the dual source blend index, and can just use the
FRAG_RESULT location.  That cascades to meaning we no longer have to
store a tuple in driver_location.  And, if we just need location, we
can avoid populating that at all and use nir_io_semantics to get it.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41122>
This commit is contained in:
Kenneth Graunke 2026-04-23 15:39:59 -07:00 committed by Marge Bot
parent 4018aea9fa
commit afb97ff2af
8 changed files with 24 additions and 44 deletions

View file

@ -3665,7 +3665,11 @@ iris_create_shader_state(struct pipe_context *ctx,
const uint64_t color_outputs = info->outputs_written &
~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK) |
BITFIELD64_BIT(FRAG_RESULT_DUAL_SRC_BLEND));
const bool dual_color =
info->outputs_written & BITFIELD64_BIT(FRAG_RESULT_DUAL_SRC_BLEND);
bool can_rearrange_varyings =
util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
@ -3673,7 +3677,7 @@ iris_create_shader_state(struct pipe_context *ctx,
key.fs = (struct iris_fs_prog_key) {
KEY_INIT(base),
.vue_layout = vue_layout(ish->nir->info.separate_shader),
.nr_color_regions = util_bitcount(color_outputs),
.nr_color_regions = util_bitcount(color_outputs) ?: dual_color,
.coherent_fb_fetch = devinfo->ver >= 9 && devinfo->ver < 20,
.input_slots_valid =
can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,

View file

@ -74,6 +74,7 @@ const struct nir_shader_compiler_options brw_scalar_nir_options = {
.support_indirect_outputs = (uint8_t)BITFIELD_MASK(MESA_SHADER_STAGES),
.per_view_unique_driver_locations = true,
.compact_view_index = true,
.io_options = nir_io_use_frag_result_dual_src_blend,
};
struct brw_compiler *

View file

@ -3282,17 +3282,15 @@ alloc_temporary(const brw_builder &bld, unsigned size, brw_reg *regs, unsigned n
}
static brw_reg
alloc_frag_output(nir_to_brw_state &ntb, unsigned location)
alloc_frag_output(nir_to_brw_state &ntb, unsigned l)
{
brw_shader &s = ntb.s;
assert(s.stage == MESA_SHADER_FRAGMENT);
const brw_fs_prog_key *const key =
reinterpret_cast<const brw_fs_prog_key *>(s.key);
const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION);
const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
if (i > 0)
if (l == FRAG_RESULT_DUAL_SRC_BLEND)
return alloc_temporary(ntb.bld, 4, &s.dual_src_output, 1);
else if (l == FRAG_RESULT_COLOR)
@ -3807,11 +3805,9 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
case nir_intrinsic_store_output: {
const brw_reg src = get_nir_src(ntb, instr->src[0], -1);
const unsigned store_offset = nir_src_as_uint(instr->src[1]);
const unsigned location = nir_intrinsic_base(instr) +
SET_FIELD(store_offset, BRW_NIR_FRAG_OUTPUT_LOCATION);
const nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
const brw_reg new_dest =
offset(retype(alloc_frag_output(ntb, location), src.type),
offset(retype(alloc_frag_output(ntb, sem.location), src.type),
bld, nir_intrinsic_component(instr));
brw_combine_with_vec(bld, new_dest, src, instr->num_components);
@ -3819,11 +3815,9 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
}
case nir_intrinsic_load_output: {
const unsigned l = GET_FIELD(nir_intrinsic_base(instr),
BRW_NIR_FRAG_OUTPUT_LOCATION);
assert(l >= FRAG_RESULT_DATA0);
const unsigned load_offset = nir_src_as_uint(instr->src[0]);
const unsigned target = l - FRAG_RESULT_DATA0 + load_offset;
const nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
assert(sem.location >= FRAG_RESULT_DATA0);
const unsigned target = sem.location - FRAG_RESULT_DATA0;
const brw_reg tmp = bld.vgrf(dest.type, 4);
/* Not functional after Gfx20 */

View file

@ -1629,15 +1629,8 @@ brw_nir_lower_mesh_outputs(nir_shader *nir,
void
brw_nir_lower_fs_outputs(nir_shader *nir)
{
nir_foreach_shader_out_variable(var, nir) {
var->data.driver_location =
SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
}
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4, 0);
NIR_PASS(_, nir, brw_nir_lower_16bit_io, nir_var_shader_out);
nir->info.disable_output_offset_src_constant_folding = true;
}
static bool

View file

@ -350,11 +350,6 @@ brw_uniform_block_size(const struct intel_device_info *devinfo,
void brw_nir_optimize(struct brw_pass_tracker *pt);
#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
#define BRW_NIR_FRAG_OUTPUT_LOCATION_SHIFT 1
#define BRW_NIR_FRAG_OUTPUT_LOCATION_MASK INTEL_MASK(31, 1)
bool brw_nir_move_interpolation_to_top(nir_shader *nir);
nir_def *brw_nir_load_global_const(nir_builder *b,
nir_intrinsic_instr *load_uniform,

View file

@ -91,23 +91,16 @@ brw_nir_lower_alpha_to_coverage(nir_shader *shader)
assert(block->cf_node.parent == &impl->cf_node);
assert(nir_cf_node_is_last(&block->cf_node));
/* See store_output in brw_shader::nir_emit_fs_intrinsic */
const unsigned store_offset = nir_src_as_uint(intrin->src[1]);
const unsigned driver_location = nir_intrinsic_base(intrin) +
SET_FIELD(store_offset, BRW_NIR_FRAG_OUTPUT_LOCATION);
const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
/* Extract the FRAG_RESULT */
const unsigned location =
GET_FIELD(driver_location, BRW_NIR_FRAG_OUTPUT_LOCATION);
if (location == FRAG_RESULT_SAMPLE_MASK) {
if (sem.location == FRAG_RESULT_SAMPLE_MASK) {
assert(sample_mask_write == NULL);
sample_mask_write = intrin;
sample_mask_write_first = (color0_write == NULL);
}
if (location == FRAG_RESULT_COLOR ||
location == FRAG_RESULT_DATA0) {
if (sem.location == FRAG_RESULT_COLOR ||
sem.location == FRAG_RESULT_DATA0) {
uint32_t mask = nir_intrinsic_write_mask(intrin) <<
nir_intrinsic_component(intrin);
/* need the w component */

View file

@ -20,11 +20,9 @@ brw_nir_lower_fs_load_output_instr(nir_builder *b,
const struct brw_fs_prog_key *key = data;
const unsigned l = GET_FIELD(nir_intrinsic_base(intrin),
BRW_NIR_FRAG_OUTPUT_LOCATION);
assert(l >= FRAG_RESULT_DATA0);
const unsigned load_offset = nir_src_as_uint(intrin->src[0]);
const unsigned target = l - FRAG_RESULT_DATA0 + load_offset;
const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
assert(sem.location >= FRAG_RESULT_DATA0);
const unsigned target = sem.location - FRAG_RESULT_DATA0;
/* Only used by Iris that never sets this to SOMETIMES */
assert(key->multisample_fbo != INTEL_SOMETIMES);

View file

@ -1279,7 +1279,9 @@ anv_shader_compute_fragment_rts(const struct intel_device_info *devinfo,
assert(shader_data->bind_map.surface_count == 0);
nir_shader *nir = shader_data->info->nir;
const uint64_t rt_mask = nir->info.outputs_written >> FRAG_RESULT_DATA0;
const uint64_t rt_mask =
(nir->info.outputs_written &
~BITFIELD_BIT(FRAG_RESULT_DUAL_SRC_BLEND)) >> FRAG_RESULT_DATA0;
const unsigned num_rts = util_last_bit64(rt_mask);
struct anv_pipeline_binding rt_bindings[MAX_RTS];