diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index f7fa6ad1e65..1c1ee70c6b1 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -383,10 +383,6 @@ index("bool", "explicit_coord") index("bool", "src_is_reg") index("bool", "dst_is_reg") -# For an Intel render target store, whether this signals end-of-thread. Must be -# the last instruction. -index("bool", "eot") - # The index of the format string used by a printf. (u_printf_info element of the shader) index("unsigned", "fmt_idx") # for NV coop matrix - num of matrix in load 1/2/4 @@ -2665,8 +2661,8 @@ intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32], indices=[PARAM_IDX, BASE], flags=[CAN_ELIMINATE, CAN_REORDER]) # Write a render target -# src[] = { payload, 2x32 descriptor, predicate } -intrinsic("store_render_target_intel", [-1, 2, 1], indices=[EOT], bit_sizes=[32]) +# src[] = { color, src0_alpha, omask, depth, stencil, predicate } +intrinsic("store_render_target_intel", [4, 1, 1, 1, 1, 1], indices=[TARGET], bit_sizes=[32, 32, 16, 32, 32, 1]) # Shuffle with an offset in bytes instead of a lane index. # src[] = { payload, lane offset in bytes } diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c index 8f1dabe5196..c88c4b6918b 100644 --- a/src/intel/compiler/jay/jay_from_nir.c +++ b/src/intel/compiler/jay/jay_from_nir.c @@ -750,34 +750,88 @@ jay_emit_derivative(jay_builder *b, jay_negate(jay_QUAD_SWIZZLE_u32(b, val, swz0))); } +static inline jay_def +optional_src(nir_src nsrc) +{ + return nir_src_is_undef(nsrc) ? jay_null() : nj_src(nsrc); +} + +static bool +scalars_equal(nir_scalar a, nir_scalar b) +{ + return nir_scalar_equal(a, b) || + (nir_scalar_is_const(a) && + nir_scalar_is_const(b) && + nir_scalar_as_uint(a) == nir_scalar_as_uint(b)); +} + static void jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr) { - jay_def data = nj_src(intr->src[0]); - jay_def srcs[8]; + const struct intel_device_info *devinfo = b->shader->devinfo; + jay_def colour = nj_src(intr->src[0]); + jay_def src0_alpha = optional_src(intr->src[1]); + jay_def omask = optional_src(intr->src[2]); + jay_def depth = optional_src(intr->src[3]); + jay_def stencil = optional_src(intr->src[4]); + const bool null_rt = ((signed) nir_intrinsic_target(intr)) < 0; + const int target = MAX2(((signed) nir_intrinsic_target(intr)), 0); + const bool last = !nir_instr_next(&intr->instr); - /* Optimize unconditional discards. Should probably do this in NIR. */ - bool trivial = - nir_src_is_const(intr->src[2]) && nir_src_as_bool(intr->src[2]); + /* If our alpha happens to match src0_alpha, we can skip sending it, + * as the hardware will use our alpha in that case. + */ + if (scalars_equal(nir_scalar_resolved(intr->src[1].ssa, 0), + nir_scalar_resolved(intr->src[0].ssa, 3))) + src0_alpha = jay_null(); - for (unsigned i = 0; i < nir_src_num_components(intr->src[0]); ++i) { - srcs[i] = - trivial ? jay_UNDEF_u32(b) : jay_as_gpr(b, jay_extract(data, i)); + unsigned op = b->shader->dispatch_width == 32 ? + XE2_DATAPORT_RENDER_TARGET_WRITE_SIMD32_SINGLE_SOURCE : + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; + uint64_t desc = + brw_fb_write_desc(devinfo, target, op, last, false /* coarse write */); + + uint64_t ex_desc = (target << 21) | + (null_rt ? (1 << 20) : 0) | + (jay_is_null(src0_alpha) ? 0 : (1 << 15)) | + (jay_is_null(stencil) ? 0 : (1 << 14)) | + (jay_is_null(depth) ? 0 : (1 << 13)) | + (jay_is_null(omask) ? 0 : (1 << 12)); + + assert((jay_is_null(src0_alpha) || jay_is_null(omask)) && + "TODO: lower alpha test to discards when samplemask is written"); + + jay_def srcs[4 + 16 + 4 + 1 + 16]; + + unsigned len = 0; + + if (!jay_is_null(src0_alpha)) + srcs[len++] = jay_as_gpr(b, src0_alpha); + + assert(jay_is_null(omask) && "TODO: samplemask"); + + for (unsigned i = 0; i < 4; i++) + srcs[len++] = jay_as_gpr(b, jay_extract(colour, i)); + + if (!jay_is_null(depth)) + srcs[len++] = jay_as_gpr(b, depth); + + assert(jay_is_null(stencil) && "TODO: stencil"); + + /* Optimize out unconditional discards (probably should do this in NIR) */ + if (nir_src_is_const(intr->src[5]) && nir_src_as_bool(intr->src[5])) { + for (unsigned i = 0; i < len; i++) + srcs[i] = jay_UNDEF_u32(b); } jay_inst *send = jay_SEND(b, .sfid = BRW_SFID_RENDER_CACHE, .check_tdr = true, - .msg_desc = nir_scalar_as_uint(nir_scalar_chase_movs( - nir_get_scalar(intr->src[1].ssa, 0))) | - (nir_scalar_as_uint(nir_scalar_chase_movs( - nir_get_scalar(intr->src[1].ssa, 1))) - << 32), - .srcs = srcs, .nr_srcs = nir_src_num_components(intr->src[0]), - .type = JAY_TYPE_U32, .eot = nir_intrinsic_eot(intr)); + .msg_desc = desc | (ex_desc << 32), .srcs = srcs, .nr_srcs = len, + .type = JAY_TYPE_U32, .eot = last); /* Handle the disable predicate. It is logically inverted. */ - if (!nir_src_is_const(intr->src[2]) || nir_src_as_bool(intr->src[2])) { - jay_add_predicate(b, send, jay_negate(nj_src(intr->src[2]))); + if (!nir_src_is_const(intr->src[5]) || nir_src_as_bool(intr->src[5])) { + jay_add_predicate(b, send, jay_negate(nj_src(intr->src[5]))); } } diff --git a/src/intel/compiler/jay/jay_nir.c b/src/intel/compiler/jay/jay_nir.c index 1241d4f9c5b..dfaffb04a25 100644 --- a/src/intel/compiler/jay/jay_nir.c +++ b/src/intel/compiler/jay/jay_nir.c @@ -166,43 +166,19 @@ collect_fragment_output(nir_builder *b, nir_intrinsic_instr *intr, void *ctx_) nir_instr_remove(&intr->instr); return true; } - -static void -append_payload(nir_builder *b, - nir_def **payload, - unsigned *len, - unsigned max_len, - nir_def *value) -{ - if (value != NULL) { - for (unsigned i = 0; i < value->num_components; ++i) { - payload[*len] = nir_channel(b, value, i); - (*len)++; - assert((*len) <= max_len); - } - } -} - static void insert_rt_store(nir_builder *b, - const struct intel_device_info *devinfo, signed target, - bool last, nir_def *colour, - nir_def *src0_alpha, + nir_def *src0_colour, nir_def *depth, nir_def *stencil, - nir_def *sample_mask, - unsigned dispatch_width) + nir_def *sample_mask) { bool null_rt = target < 0; target = MAX2(target, 0); - if (!colour) { - colour = nir_undef(b, 4, 32); - } - - colour = nir_pad_vec4(b, colour); + colour = nir_pad_vec4(b, colour ?: nir_undef(b, 4, 32)); if (null_rt) { /* Even if we don't write a RT, we still need to write alpha for @@ -212,42 +188,14 @@ insert_rt_store(nir_builder *b, nir_channel(b, colour, 3), 3); } - /* TODO: Not sure I like this. We'll see what 2src looks like. */ - unsigned op = dispatch_width == 32 ? - XE2_DATAPORT_RENDER_TARGET_WRITE_SIMD32_SINGLE_SOURCE : - BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; - uint64_t desc = - brw_fb_write_desc(devinfo, target, op, last, false /* coarse write */); - - uint64_t ex_desc = 0; - if (devinfo->ver >= 20) { - ex_desc = target << 21 | - null_rt << 20 | - (src0_alpha ? (1 << 15) : 0) | - (stencil ? (1 << 14) : 0) | - (depth ? (1 << 13) : 0) | - (sample_mask ? (1 << 12) : 0); - } else if (devinfo->ver >= 11) { - /* Set the "Render Target Index" and "Src0 Alpha Present" fields - * in the extended message descriptor, in lieu of using a header. - */ - ex_desc = target << 12 | null_rt << 20 | (src0_alpha ? (1 << 15) : 0); - } - - /* Build the payload */ - nir_def *payload[8] = { NULL }; - unsigned len = 0; - append_payload(b, payload, &len, ARRAY_SIZE(payload), colour); - append_payload(b, payload, &len, ARRAY_SIZE(payload), depth); - /* TODO */ + nir_def *src0_alpha = nir_channel_or_undef(b, src0_colour ?: colour, 3); nir_def *disable = b->shader->info.fs.uses_discard ? nir_is_helper_invocation(b, 1) : nir_imm_false(b); - nir_store_render_target_intel(b, nir_vec(b, payload, len), - nir_imm_ivec2(b, desc, ex_desc), disable, - .eot = last); + nir_store_render_target_intel(b, colour, src0_alpha, sample_mask, depth, + stencil, disable, .target = target); } static void @@ -271,16 +219,18 @@ lower_fragment_outputs(nir_function_impl *impl, } } + nir_def *undef = nir_undef(b, 1, 32); for (signed i = 0; i < last; ++i) { if (ctx.colour[i]) { - insert_rt_store(b, devinfo, i, false, ctx.colour[i], NULL, ctx.depth, - ctx.stencil, ctx.sample_mask, dispatch_width); + insert_rt_store(b, i, ctx.colour[i], i > 0 ? ctx.colour[0] : NULL, + ctx.depth ?: undef, ctx.stencil ?: undef, + ctx.sample_mask ?: undef); } } - insert_rt_store(b, devinfo, last, true, last >= 0 ? ctx.colour[last] : NULL, - NULL, ctx.depth, ctx.stencil, ctx.sample_mask, - dispatch_width); + insert_rt_store(b, last, last >= 0 ? ctx.colour[last] : NULL, + last > 0 ? ctx.colour[0] : NULL, ctx.depth ?: undef, + ctx.stencil ?: undef, ctx.sample_mask ?: undef); } unsigned