brw: Add brw_fb_write_inst

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36730>
This commit is contained in:
Caio Oliveira 2025-08-29 20:29:26 -07:00 committed by Marge Bot
parent d06c0a370e
commit df2b5fb03f
6 changed files with 75 additions and 51 deletions

View file

@ -18,7 +18,7 @@
#include <memory>
static brw_inst *
static brw_fb_write_inst *
brw_emit_single_fb_write(brw_shader &s, const brw_builder &bld,
brw_reg color0, brw_reg color1,
brw_reg src0_alpha,
@ -32,10 +32,6 @@ brw_emit_single_fb_write(brw_shader &s, const brw_builder &bld,
sources[FB_WRITE_LOGICAL_SRC_COLOR0] = color0;
sources[FB_WRITE_LOGICAL_SRC_COLOR1] = color1;
sources[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA] = src0_alpha;
sources[FB_WRITE_LOGICAL_SRC_TARGET] = brw_imm_ud(target);
sources[FB_WRITE_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(components);
sources[FB_WRITE_LOGICAL_SRC_NULL_RT] = brw_imm_ud(null_rt);
sources[FB_WRITE_LOGICAL_SRC_LAST_RT] = brw_imm_ud(false);
if (prog_data->uses_omask)
sources[FB_WRITE_LOGICAL_SRC_OMASK] = s.sample_mask;
@ -44,8 +40,12 @@ brw_emit_single_fb_write(brw_shader &s, const brw_builder &bld,
if (s.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
sources[FB_WRITE_LOGICAL_SRC_SRC_STENCIL] = s.frag_stencil;
brw_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, brw_reg(),
sources, ARRAY_SIZE(sources));
brw_fb_write_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, brw_reg(),
sources, ARRAY_SIZE(sources))->as_fb_write();
write->target = target;
write->components = components;
write->null_rt = null_rt;
write->last_rt = false;
if (prog_data->uses_kill) {
write->predicate = BRW_PREDICATE_NORMAL;
@ -64,7 +64,7 @@ brw_do_emit_fb_writes(brw_shader &s, int nr_color_regions, bool replicate_alpha)
const bool double_rt_writes = s.devinfo->ver == 11 &&
prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES;
brw_inst *inst = NULL;
brw_fb_write_inst *write = NULL;
for (int target = 0; target < nr_color_regions; target++) {
/* Skip over outputs that weren't written. */
if (s.outputs[target].file == BAD_FILE)
@ -77,18 +77,18 @@ brw_do_emit_fb_writes(brw_shader &s, int nr_color_regions, bool replicate_alpha)
if (replicate_alpha && target != 0)
src0_alpha = offset(s.outputs[0], bld, 3);
inst = brw_emit_single_fb_write(s, abld, s.outputs[target],
s.dual_src_output, src0_alpha,
target, 4, false);
write = brw_emit_single_fb_write(s, abld, s.outputs[target],
s.dual_src_output, src0_alpha,
target, 4, false);
}
bool flag_dummy_message = inst && double_rt_writes;
if (inst) {
inst->src[FB_WRITE_LOGICAL_SRC_LAST_RT] = brw_imm_ud(true);
inst->eot = true;
bool flag_dummy_message = write && double_rt_writes;
if (write) {
write->last_rt = true;
write->eot = true;
}
if (inst == NULL) {
if (write == NULL) {
struct brw_wm_prog_key *key = (brw_wm_prog_key*) s.key;
/* Disable null_rt if any non color output is written or if
* alpha_to_coverage can be enabled. Since the alpha_to_coverage bit is
@ -111,11 +111,11 @@ brw_do_emit_fb_writes(brw_shader &s, int nr_color_regions, bool replicate_alpha)
const brw_reg tmp = bld.vgrf(BRW_TYPE_UD, 4);
bld.LOAD_PAYLOAD(tmp, srcs, 4, 0);
inst = brw_emit_single_fb_write(s, bld, tmp, reg_undef, reg_undef,
write = brw_emit_single_fb_write(s, bld, tmp, reg_undef, reg_undef,
0, 4, use_null_rt);
inst->src[FB_WRITE_LOGICAL_SRC_LAST_RT] = brw_imm_ud(true);
inst->has_no_mask_send_params = flag_dummy_message;
inst->eot = true;
write->last_rt = true;
write->has_no_mask_send_params = flag_dummy_message;
write->eot = true;
}
}

View file

@ -598,10 +598,6 @@ enum fb_write_logical_srcs {
FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */
FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */
FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */
FB_WRITE_LOGICAL_SRC_TARGET, /* REQUIRED */
FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */
FB_WRITE_LOGICAL_SRC_NULL_RT, /* Null RT write */
FB_WRITE_LOGICAL_SRC_LAST_RT, /* Last RT? (bool as UD immediate) */
FB_WRITE_LOGICAL_NUM_SRCS
};

View file

@ -19,6 +19,7 @@ brw_inst_kind_size(brw_inst_kind kind)
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_dpas_inst));
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_load_payload_inst));
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_urb_inst));
STATIC_ASSERT(sizeof(brw_send_inst) >= sizeof(brw_fb_write_inst));
/* TODO: Temporarily here to ensure all instructions can be converted to
* SEND. Once all new kinds are added, change so that BASE allocate only
@ -197,6 +198,9 @@ brw_inst_kind_for_opcode(enum opcode opcode)
case SHADER_OPCODE_URB_WRITE_LOGICAL:
return BRW_KIND_URB;
case FS_OPCODE_FB_WRITE_LOGICAL:
return BRW_KIND_FB_WRITE;
default:
return BRW_KIND_BASE;
}
@ -446,10 +450,9 @@ brw_inst::components_read(unsigned i) const
return 1;
case FS_OPCODE_FB_WRITE_LOGICAL:
assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
/* First/second FB write color. */
if (i < 2)
return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
return as_fb_write()->components;
else
return 1;

View file

@ -47,6 +47,7 @@ enum ENUM_PACKED brw_inst_kind {
BRW_KIND_DPAS,
BRW_KIND_LOAD_PAYLOAD,
BRW_KIND_URB,
BRW_KIND_FB_WRITE,
};
brw_inst_kind brw_inst_kind_for_opcode(enum opcode opcode);
@ -78,6 +79,7 @@ struct brw_inst : brw_exec_node {
KIND_HELPERS(as_dpas, brw_dpas_inst, BRW_KIND_DPAS);
KIND_HELPERS(as_load_payload, brw_load_payload_inst, BRW_KIND_LOAD_PAYLOAD);
KIND_HELPERS(as_urb, brw_urb_inst, BRW_KIND_URB);
KIND_HELPERS(as_fb_write, brw_fb_write_inst, BRW_KIND_FB_WRITE);
#undef KIND_HELPERS
@ -311,6 +313,13 @@ struct brw_urb_inst : brw_inst {
uint8_t components;
};
struct brw_fb_write_inst : brw_inst {
uint8_t components;
uint8_t target;
bool null_rt;
bool last_rt;
};
/**
* Make the execution of \p inst dependent on the evaluation of a possibly
* inverted predicate.

View file

@ -294,28 +294,22 @@ setup_color_payload(const brw_builder &bld, const brw_wm_prog_key *key,
}
static void
lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
lower_fb_write_logical_send(const brw_builder &bld, brw_fb_write_inst *write,
const struct brw_wm_prog_data *prog_data,
const brw_wm_prog_key *key,
const brw_fs_thread_payload &fs_payload)
{
assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
assert(inst->src[FB_WRITE_LOGICAL_SRC_NULL_RT].file == IMM);
assert(inst->src[FB_WRITE_LOGICAL_SRC_LAST_RT].file == IMM);
assert(inst->src[FB_WRITE_LOGICAL_SRC_TARGET].file == IMM);
const intel_device_info *devinfo = bld.shader->devinfo;
const brw_reg color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0];
const brw_reg color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1];
const brw_reg src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
const brw_reg src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
const brw_reg src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
brw_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
const unsigned components =
inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
const unsigned target = inst->src[FB_WRITE_LOGICAL_SRC_TARGET].ud;
const bool null_rt = inst->src[FB_WRITE_LOGICAL_SRC_NULL_RT].ud != 0;
const bool last_rt = inst->src[FB_WRITE_LOGICAL_SRC_LAST_RT].ud != 0;
const brw_reg color0 = write->src[FB_WRITE_LOGICAL_SRC_COLOR0];
const brw_reg color1 = write->src[FB_WRITE_LOGICAL_SRC_COLOR1];
const brw_reg src0_alpha = write->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA];
const brw_reg src_depth = write->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH];
const brw_reg src_stencil = write->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
brw_reg sample_mask = write->src[FB_WRITE_LOGICAL_SRC_OMASK];
const unsigned components = write->components;
const unsigned target = write->target;
const bool null_rt = write->null_rt;
const bool last_rt = write->last_rt;
assert(target != 0 || src0_alpha.file == BAD_FILE);
@ -391,7 +385,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
header_size = length;
if (fs_payload.aa_dest_stencil_reg[0]) {
assert(inst->group < 16);
assert(write->group < 16);
sources[length] = retype(brw_allocate_vgrf_units(*bld.shader, 1), BRW_TYPE_F);
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
.MOV(sources[length],
@ -427,7 +421,7 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
bld.exec_all().annotate("FB write oMask")
.MOV(horiz_offset(retype(tmp, BRW_TYPE_UW),
inst->group % (16 * reg_unit(devinfo))),
write->group % (16 * reg_unit(devinfo))),
sample_mask);
for (unsigned i = 0; i < reg_unit(devinfo); i++)
@ -465,17 +459,17 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
payload.nr = brw_allocate_vgrf_units(*bld.shader, regs_written(load)).nr;
load->dst = payload;
uint32_t msg_ctl = brw_fb_write_msg_control(inst, prog_data);
uint32_t msg_ctl = brw_fb_write_msg_control(write, prog_data);
/* XXX - Bit 13 Per-sample PS enable */
uint32_t desc =
(inst->group / 16) << 11 | /* rt slot group */
(write->group / 16) << 11 | /* rt slot group */
brw_fb_write_desc(devinfo, target, msg_ctl, last_rt,
0 /* coarse_rt_write */);
brw_reg desc_reg = brw_imm_ud(0);
if (prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES &&
!inst->has_no_mask_send_params) {
!write->has_no_mask_send_params) {
assert(devinfo->ver >= 11);
if (devinfo->ver != 11) {
const brw_builder &ubld =
@ -506,8 +500,8 @@ lower_fb_write_logical_send(const brw_builder &bld, brw_inst *inst,
(src0_alpha.file != BAD_FILE) << 15;
}
brw_send_inst *send = brw_transform_inst_to_send(bld, inst);
inst = NULL;
brw_send_inst *send = brw_transform_inst_to_send(bld, write);
write = NULL;
send->desc = desc;
send->ex_desc = ex_desc;
@ -2617,7 +2611,7 @@ brw_lower_logical_sends(brw_shader &s)
switch (inst->opcode) {
case FS_OPCODE_FB_WRITE_LOGICAL:
assert(s.stage == MESA_SHADER_FRAGMENT);
lower_fb_write_logical_send(ibld, inst,
lower_fb_write_logical_send(ibld, inst->as_fb_write(),
brw_wm_prog_data(s.prog_data),
(const brw_wm_prog_key *)s.key,
s.fs_payload());

View file

@ -296,6 +296,15 @@ urb_inst_match(brw_urb_inst *a, brw_urb_inst *b)
a->components == b->components;
}
static bool
fb_write_inst_match(brw_fb_write_inst *a, brw_fb_write_inst *b)
{
return a->components == b->components &&
a->target == b->target &&
a->null_rt == b->null_rt &&
a->last_rt == b->last_rt;
}
static bool
instructions_match(brw_inst *a, brw_inst *b, bool *negate)
{
@ -309,6 +318,7 @@ instructions_match(brw_inst *a, brw_inst *b, bool *negate)
(a->kind != BRW_KIND_LOAD_PAYLOAD ||
load_payload_inst_match(a->as_load_payload(), b->as_load_payload())) &&
(a->kind != BRW_KIND_URB || urb_inst_match(a->as_urb(), b->as_urb())) &&
(a->kind != BRW_KIND_FB_WRITE || fb_write_inst_match(a->as_fb_write(), b->as_fb_write())) &&
a->exec_size == b->exec_size &&
a->group == b->group &&
a->predicate == b->predicate &&
@ -454,6 +464,18 @@ hash_inst(const void *v)
break;
}
case BRW_KIND_FB_WRITE: {
const brw_fb_write_inst *fb_write = inst->as_fb_write();
const uint8_t fb_write_u8data[] = {
fb_write->components,
fb_write->target,
fb_write->null_rt,
fb_write->last_rt,
};
hash = HASH(hash, fb_write_u8data);
break;
}
case BRW_KIND_BASE:
/* Nothing else to do. */
break;