mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 02:10:11 +01:00
intel/fs: add support for ACCESS_ENABLE_HELPER
v2: Factor out fragment shader masking on send messages (Caio)
Update comments (Caio)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13719>
This commit is contained in:
parent
c199f44d17
commit
9d22f8ed23
3 changed files with 76 additions and 12 deletions
|
|
@ -947,6 +947,10 @@ enum a64_logical_srcs {
|
|||
A64_LOGICAL_SRC,
|
||||
/** Per-opcode immediate argument. Number of dwords, bit size, or atomic op. */
|
||||
A64_LOGICAL_ARG,
|
||||
/**
|
||||
* Some instructions do want to run on helper lanes (like ray queries).
|
||||
*/
|
||||
A64_LOGICAL_ENABLE_HELPERS,
|
||||
|
||||
A64_LOGICAL_NUM_SRCS
|
||||
};
|
||||
|
|
|
|||
|
|
@ -5475,6 +5475,40 @@ emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst)
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Predicate the specified instruction on the vector mask.
|
||||
*/
|
||||
static void
|
||||
emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
assert(bld.shader->stage == MESA_SHADER_FRAGMENT &&
|
||||
bld.group() == inst->group &&
|
||||
bld.dispatch_width() == inst->exec_size);
|
||||
|
||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
||||
|
||||
const fs_visitor *v = static_cast<const fs_visitor *>(bld.shader);
|
||||
const fs_reg vector_mask = ubld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
ubld.emit(SHADER_OPCODE_READ_SR_REG, vector_mask, brw_imm_ud(3));
|
||||
const unsigned subreg = sample_mask_flag_subreg(v);
|
||||
|
||||
ubld.MOV(brw_flag_subreg(subreg + inst->group / 16), vector_mask);
|
||||
|
||||
if (inst->predicate) {
|
||||
assert(inst->predicate == BRW_PREDICATE_NORMAL);
|
||||
assert(!inst->predicate_inverse);
|
||||
assert(inst->flag_subreg == 0);
|
||||
/* Combine the vector mask with the existing predicate by using a
|
||||
* vertical predication mode.
|
||||
*/
|
||||
inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
|
||||
} else {
|
||||
inst->flag_subreg = subreg;
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
inst->predicate_inverse = false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
||||
const fs_reg &surface, const fs_reg &surface_handle)
|
||||
|
|
@ -6068,6 +6102,26 @@ emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr)
|
|||
return header;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_fragment_mask(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
assert(inst->src[A64_LOGICAL_ENABLE_HELPERS].file == IMM);
|
||||
const bool enable_helpers = inst->src[A64_LOGICAL_ENABLE_HELPERS].ud;
|
||||
|
||||
/* If we're a fragment shader, we have to predicate with the sample mask to
|
||||
* avoid helper invocations to avoid helper invocations in instructions
|
||||
* with side effects, unless they are explicitly required.
|
||||
*
|
||||
* There are also special cases when we actually want to run on helpers
|
||||
* (ray queries).
|
||||
*/
|
||||
assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
|
||||
if (enable_helpers)
|
||||
emit_predicate_on_vector_mask(bld, inst);
|
||||
else if (inst->has_side_effects())
|
||||
emit_predicate_on_sample_mask(bld, inst);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
|
|
@ -6083,12 +6137,6 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
const unsigned arg = inst->src[A64_LOGICAL_ARG].ud;
|
||||
const bool has_side_effects = inst->has_side_effects();
|
||||
|
||||
/* If the surface message has side effects and we're a fragment shader, we
|
||||
* have to predicate with the sample mask to avoid helper invocations.
|
||||
*/
|
||||
if (has_side_effects && bld.shader->stage == MESA_SHADER_FRAGMENT)
|
||||
emit_predicate_on_sample_mask(bld, inst);
|
||||
|
||||
fs_reg payload = retype(bld.move_to_vgrf(addr, 1), BRW_REGISTER_TYPE_UD);
|
||||
fs_reg payload2 = retype(bld.move_to_vgrf(src, src_comps),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
|
|
@ -6164,6 +6212,9 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
unreachable("Unknown A64 logical instruction");
|
||||
}
|
||||
|
||||
if (bld.shader->stage == MESA_SHADER_FRAGMENT)
|
||||
emit_fragment_mask(bld, inst);
|
||||
|
||||
/* Update the original instruction. */
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
|
||||
|
|
@ -6193,12 +6244,6 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
const unsigned arg = inst->src[A64_LOGICAL_ARG].ud;
|
||||
const bool has_side_effects = inst->has_side_effects();
|
||||
|
||||
/* If the surface message has side effects and we're a fragment shader, we
|
||||
* have to predicate with the sample mask to avoid helper invocations.
|
||||
*/
|
||||
if (has_side_effects && bld.shader->stage == MESA_SHADER_FRAGMENT)
|
||||
emit_predicate_on_sample_mask(bld, inst);
|
||||
|
||||
fs_reg payload, payload2;
|
||||
unsigned mlen, ex_mlen = 0, header_size = 0;
|
||||
if (inst->opcode == SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL ||
|
||||
|
|
@ -6322,6 +6367,9 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
unreachable("Unknown A64 logical instruction");
|
||||
}
|
||||
|
||||
if (bld.shader->stage == MESA_SHADER_FRAGMENT)
|
||||
emit_fragment_mask(bld, inst);
|
||||
|
||||
/* Update the original instruction. */
|
||||
inst->opcode = SHADER_OPCODE_SEND;
|
||||
inst->mlen = mlen;
|
||||
|
|
|
|||
|
|
@ -4792,6 +4792,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
fs_reg srcs[A64_LOGICAL_NUM_SRCS];
|
||||
srcs[A64_LOGICAL_ADDRESS] = get_nir_src(instr->src[0]);
|
||||
srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */
|
||||
srcs[A64_LOGICAL_ENABLE_HELPERS] =
|
||||
brw_imm_ud(nir_intrinsic_access(instr) & ACCESS_INCLUDE_HELPERS);
|
||||
|
||||
if (nir_dest_bit_size(instr->dest) == 32 &&
|
||||
nir_intrinsic_align(instr) >= 4) {
|
||||
|
|
@ -4828,6 +4830,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
|
||||
fs_reg srcs[A64_LOGICAL_NUM_SRCS];
|
||||
srcs[A64_LOGICAL_ADDRESS] = get_nir_src(instr->src[1]);
|
||||
srcs[A64_LOGICAL_ENABLE_HELPERS] =
|
||||
brw_imm_ud(nir_intrinsic_access(instr) & ACCESS_INCLUDE_HELPERS);
|
||||
|
||||
if (nir_src_bit_size(instr->src[0]) == 32 &&
|
||||
nir_intrinsic_align(instr) >= 4) {
|
||||
|
|
@ -4912,6 +4916,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
srcs[A64_LOGICAL_ADDRESS] = addr;
|
||||
srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */
|
||||
srcs[A64_LOGICAL_ARG] = brw_imm_ud(instr->num_components);
|
||||
/* This intrinsic loads memory from a uniform address, sometimes
|
||||
* shared across lanes. We never need to mask it.
|
||||
*/
|
||||
srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0);
|
||||
|
||||
fs_inst *load = ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL,
|
||||
load_val, srcs, A64_LOGICAL_NUM_SRCS);
|
||||
|
|
@ -5616,6 +5624,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
srcs[A64_LOGICAL_ADDRESS] = address;
|
||||
srcs[A64_LOGICAL_SRC] = fs_reg(); /* No source data */
|
||||
srcs[A64_LOGICAL_ARG] = brw_imm_ud(block);
|
||||
srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(1);
|
||||
ubld.emit(SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
|
||||
retype(byte_offset(dest, loaded * 4), BRW_REGISTER_TYPE_UD),
|
||||
srcs, A64_LOGICAL_NUM_SRCS)->size_written = block_bytes;
|
||||
|
|
@ -5650,6 +5659,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
srcs[A64_LOGICAL_SRC] = retype(byte_offset(src, written * 4),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
srcs[A64_LOGICAL_ARG] = brw_imm_ud(block);
|
||||
srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0);
|
||||
|
||||
const fs_builder &ubld = block == 8 ? ubld8 : ubld16;
|
||||
ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL, fs_reg(),
|
||||
|
|
@ -6054,6 +6064,7 @@ fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
|
|||
srcs[A64_LOGICAL_ADDRESS] = addr;
|
||||
srcs[A64_LOGICAL_SRC] = data;
|
||||
srcs[A64_LOGICAL_ARG] = brw_imm_ud(op);
|
||||
srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0);
|
||||
|
||||
switch (nir_dest_bit_size(instr->dest)) {
|
||||
case 16: {
|
||||
|
|
@ -6102,6 +6113,7 @@ fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld,
|
|||
srcs[A64_LOGICAL_ADDRESS] = addr;
|
||||
srcs[A64_LOGICAL_SRC] = data;
|
||||
srcs[A64_LOGICAL_ARG] = brw_imm_ud(op);
|
||||
srcs[A64_LOGICAL_ENABLE_HELPERS] = brw_imm_ud(0);
|
||||
|
||||
switch (nir_dest_bit_size(instr->dest)) {
|
||||
case 16: {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue