diff --git a/src/intel/compiler/brw/brw_disasm.c b/src/intel/compiler/brw/brw_disasm.c index 8de18bfc628..2b3c1b9c6fe 100644 --- a/src/intel/compiler/brw/brw_disasm.c +++ b/src/intel/compiler/brw/brw_disasm.c @@ -548,7 +548,10 @@ static const char *const xe2_sampler_msg_type[] = { [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B] = "gather4_b", [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C] = "gather4_i_c", [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C] = "gather4_l_c", + [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c", + [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_I] = "gather4_po_i", [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L] = "gather4_po_l", + [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_I_C] = "gather4_po_i_c", [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L_C] = "gather4_po_l_c", [XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_B] = "gather4_po_b", [HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c", @@ -559,6 +562,16 @@ static const char *const xe2_sampler_msg_type[] = { [GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS] = "ld_mcs", [GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS] = "ld2dms", [GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS] = "ld2dss", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO] = "sample_po", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO_BIAS] = "sample_po_b", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO_LOD] = "sample_po_l", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO_COMPARE] = "sample_po_c", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO_DERIVS] = "sample_po_d", + [XE3_SAMPLER_MESSAGE_SAMPLE_PO_BIAS_COMPARE] = "sample_po_b_c", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO_LOD_COMPARE] = "sample_po_l_c", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO_D_C] = "sample_po_d_c", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO_LZ] = "sample_po_lz", + [XE2_SAMPLER_MESSAGE_SAMPLE_PO_C_LZ] = "sample_po_c_lz", }; static const char *const gfx5_sampler_simd_mode[7] = { diff --git a/src/intel/compiler/brw/brw_eu_defines.h b/src/intel/compiler/brw/brw_eu_defines.h index 86172b4dcc6..a7685d490d8 100644 --- a/src/intel/compiler/brw/brw_eu_defines.h +++ b/src/intel/compiler/brw/brw_eu_defines.h @@ -1210,9 +1210,23 @@ enum brw_sfid { #define GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS 29 #define GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS 30 #define GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS 31 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO 32 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO_BIAS 33 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO_LOD 34 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO_COMPARE 35 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO_DERIVS 36 +#define XE3_SAMPLER_MESSAGE_SAMPLE_PO_BIAS_COMPARE 37 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO_LOD_COMPARE 38 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO 40 #define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L 45 #define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_B 46 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_I 47 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 48 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO_D_C 52 +#define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_I_C 53 #define XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L_C 55 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO_LZ 56 +#define XE2_SAMPLER_MESSAGE_SAMPLE_PO_C_LZ 57 /* for GFX5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 3b277ac951c..a0d49737d2c 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -7438,6 +7438,9 @@ brw_from_nir_emit_texture(nir_to_brw_state &ntb, case P(BIAS_OFFUVR4): case P(LOD_OFFUV6): case P(LOD_OFFUVR4): + case P(OFFUV4_R): + case P(OFFUV6_R): + case P(OFFUVR4_R): /* There is no payload with 2 packed entries, so backend1 is always * the one payload parameter packed. */ S(backend1, 0); diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index bb37c70efb3..51cb8d87679 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -24,6 +24,7 @@ #include "intel_nir.h" #include "brw_nir.h" #include "brw_private.h" +#include "brw_sampler.h" #include "compiler/glsl_types.h" #include "compiler/nir/nir_builder.h" #include "dev/intel_debug.h" @@ -2062,6 +2063,10 @@ lower_txd_cb(const nir_tex_instr *tex, const void *data) return true; } + if (tex->is_shadow && offset_index >= 0 && + !brw_nir_tex_offset_in_constant_range(tex, offset_index)) + return true; + return false; } diff --git a/src/intel/compiler/brw/brw_nir_lower_texture.c b/src/intel/compiler/brw/brw_nir_lower_texture.c index dcbd4a21294..ebde8494c51 100644 --- a/src/intel/compiler/brw/brw_nir_lower_texture.c +++ b/src/intel/compiler/brw/brw_nir_lower_texture.c @@ -29,7 +29,10 @@ /** * Takes care of lowering to target HW messages payload. * - * For example, HW has no gather4_po_i_b so lower to gather_po_l. + * For example: + * - HW has no gather4_po_i_b so lower to gather_po_l. + * - HW has no sample_po_b_c message, so lower the bias into the LOD to switch + * to sample_po_c_l instead. */ static bool pre_lower_texture_instr(nir_builder *b, @@ -37,6 +40,30 @@ pre_lower_texture_instr(nir_builder *b, void *data) { switch (tex->op) { + case nir_texop_txb: { + int bias_index = nir_tex_instr_src_index(tex, nir_tex_src_bias); + assert(bias_index != -1); + int comparator_index = nir_tex_instr_src_index(tex, nir_tex_src_comparator); + int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); + + if (comparator_index == -1 || offset_index == -1) + return false; + + if (brw_nir_tex_offset_in_constant_range(tex, offset_index)) + return false; + + b->cursor = nir_before_instr(&tex->instr); + + tex->op = nir_texop_txl; + + nir_def *bias = tex->src[bias_index].src.ssa; + nir_tex_instr_remove_src(tex, bias_index); + + nir_def *lod = nir_fadd(b, bias, nir_get_texture_lod(b, tex)); + nir_tex_instr_add_src(tex, nir_tex_src_lod, lod); + return true; + } + case nir_texop_tg4: { if (!tex->is_gather_implicit_lod) return false; @@ -228,6 +255,65 @@ pack_lod_or_bias_and_offset(nir_builder *b, nir_tex_instr *tex, return true; } +static bool +pack_offset_r(nir_builder *b, nir_tex_instr *tex, + unsigned offset_bits, unsigned offset_count) +{ + nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset); + if (!offset) + return false; + + const int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + assert(coord_index >= 0); + + b->cursor = nir_before_instr(&tex->instr); + + nir_def *coord = tex->src[coord_index].src.ssa; + + nir_def *offuvr = build_packed_offset( + b, offset, offset_bits, offset_count); + + nir_def *packed = nir_ishl_imm(b, offuvr, 12); + + assert(tex->coord_components != 4); + if (tex->coord_components == 3) { + nir_def *clamped_r = + nir_umin_imm( + b, + nir_f2u32(b, nir_fround_even(b, nir_channel(b, coord, 2))), + 0xfff); + + packed = nir_ior(b, packed, clamped_r); + + nir_def *reduced_coord = nir_trim_vector(b, coord, 2); + tex->coord_components = 2; + + nir_src_rewrite(&tex->src[coord_index].src, reduced_coord); + } + + nir_tex_instr_add_src(tex, nir_tex_src_backend1, packed); + + return true; +} + +static bool +pack_offset(nir_builder *b, nir_tex_instr *tex, + unsigned offset_bits, unsigned offset_count) +{ + nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset); + if (!offset) + return false; + + b->cursor = nir_before_instr(&tex->instr); + + nir_def *offuvr = build_packed_offset( + b, offset, offset_bits, offset_count); + + nir_tex_instr_add_src(tex, nir_tex_src_backend1, offuvr); + + return true; +} + static bool brw_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) { @@ -251,6 +337,34 @@ brw_nir_lower_texture_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUVR4) != -1) return pack_lod_or_bias_and_offset(b, tex, 4, 3); + if (brw_sampler_opcode_param_index(sampler_opcode, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUV4_R) != -1) + return pack_offset_r(b, tex, 4, 2); + + if (brw_sampler_opcode_param_index(sampler_opcode, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR4_R) != -1) + return pack_offset_r(b, tex, 4, 3); + + if (brw_sampler_opcode_param_index(sampler_opcode, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUV6_R) != -1) + return pack_offset_r(b, tex, 6, 2); + + if (brw_sampler_opcode_param_index(sampler_opcode, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUV4) != -1) + return pack_offset(b, tex, 4, 2); + + if (brw_sampler_opcode_param_index(sampler_opcode, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR4) != -1) + return pack_offset(b, tex, 4, 3); + + if (brw_sampler_opcode_param_index(sampler_opcode, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUV6) != -1) + return pack_offset(b, tex, 6, 2); + + if (brw_sampler_opcode_param_index(sampler_opcode, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR6) != -1) + return pack_offset(b, tex, 6, 3); + return false; } diff --git a/src/intel/compiler/brw/brw_sampler.c b/src/intel/compiler/brw/brw_sampler.c index da8338d80f8..46fc35d4282 100644 --- a/src/intel/compiler/brw/brw_sampler.c +++ b/src/intel/compiler/brw/brw_sampler.c @@ -57,6 +57,13 @@ static const opcode_filter_cb opcode_filters[BRW_SAMPLER_OPCODE_MAX] = { [BRW_SAMPLER_OPCODE_SAMPLE_L_C_PACKED] = gfx200_cube_array, [BRW_SAMPLER_OPCODE_SAMPLE_D_C] = not_gfx200_2darray, [BRW_SAMPLER_OPCODE_SAMPLE_D_C_PACKED] = gfx200_2darray, + [BRW_SAMPLER_OPCODE_SAMPLE_PO] = gfx200, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_B] = gfx200, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_C] = gfx200, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_C_LZ] = gfx200, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_D] = gfx200, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_L] = gfx200, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_LZ] = gfx200, [BRW_SAMPLER_OPCODE_GATHER4_B] = gfx200, [BRW_SAMPLER_OPCODE_GATHER4_I] = gfx200, [BRW_SAMPLER_OPCODE_GATHER4_L] = gfx200, @@ -65,6 +72,8 @@ static const opcode_filter_cb opcode_filters[BRW_SAMPLER_OPCODE_MAX] = { [BRW_SAMPLER_OPCODE_GATHER4_PO_B] = gfx200, [BRW_SAMPLER_OPCODE_GATHER4_PO_C] = not_gfx200, [BRW_SAMPLER_OPCODE_GATHER4_PO_C_PACKED] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO_I] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO_I_C] = gfx200, [BRW_SAMPLER_OPCODE_GATHER4_PO_L] = gfx200, [BRW_SAMPLER_OPCODE_GATHER4_PO_L_C] = gfx200, [BRW_SAMPLER_OPCODE_LD2DMS_W] = not_gfx125, @@ -271,6 +280,96 @@ static const struct sampler_opcode_desc { }, }, }, + [BRW_SAMPLER_OPCODE_SAMPLE_PO] = { + .name = "sample_po", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_PO, + .nir_src_mask = N(coord) | N(offset) | N(min_lod), + .has_offset_payload = true, + .payload = { + .sources = { + R(U), R(V), R(R), R(OFFUVR4), O(MLOD) + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_B] = { + .name = "sample_po_b", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_PO_BIAS, + .nir_src_mask = N(bias) | N(coord) | N(offset) | N(min_lod), + .has_offset_payload = true, + .payload = { + .sources = { + R(BIAS_OFFUVR4), R(U), O(V), O(R), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_C] = { + .name = "sample_po_c", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_PO_COMPARE, + .nir_src_mask = N(comparator) | N(coord) | N(offset) | N(min_lod), + .has_offset_payload = true, + .payload = { + .sources = { + R(REF), R(U), R(V), R(OFFUV4_R), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_C_LZ] = { + .name = "sample_po_c_lz", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_PO_C_LZ, + .nir_src_mask = N(comparator) | N(lod) | N(coord) | N(offset), + .has_offset_payload = true, + .lod_zero = true, + .payload = { + .sources = { + R(REF), R(U), R(V), R(OFFUV4_R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_C_L] = { + .name = "sample_po_c_l", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_PO_LOD_COMPARE, + .nir_src_mask = N(comparator) | N(lod) | N(coord) | N(offset), + .has_offset_payload = true, + .payload = { + .sources = { + R(REF), R(LOD_OFFUVR4), R(U), O(V), O(R) + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_D] = { + .name = "sample_po_d", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_PO_DERIVS, + .nir_src_mask = N(ddx) | N(ddy) | N(coord) | N(offset) | N(min_lod), + .has_offset_payload = true, + .payload = { + .sources = { + R(U), R(DUDX), R(DUDY), R(V), R(DVDX), R(DVDY), R(OFFUVR4_R), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_L] = { + .name = "sample_po_l", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_PO_LOD, + .nir_src_mask = N(lod) | N(coord) | N(offset), + .has_offset_payload = true, + .payload = { + .sources = { + R(LOD_OFFUVR4), R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_PO_LZ] = { + .name = "sample_po_lz", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_PO_LZ, + .nir_src_mask = N(lod) | N(coord) | N(offset), + .has_offset_payload = true, + .lod_zero = true, + .payload = { + .sources = { + R(U), R(V), R(R), R(OFFUVR4), + }, + }, + }, [BRW_SAMPLER_OPCODE_LD] = { .name = "ld", .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LD, @@ -467,6 +566,32 @@ static const struct sampler_opcode_desc { }, }, }, + [BRW_SAMPLER_OPCODE_GATHER4_PO_I] = { + .name = "gather4_po_i", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_I, + .nir_src_mask = N(comparator) | N(coord) | N(offset), + .is_gather = true, + .is_gather_implicit_lod = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(REF), R(U), R(V), R(R), R(OFFUV6), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_PO_I_C] = { + .name = "gather4_po_i_c", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_I_C, + .nir_src_mask = N(comparator) | N(coord) | N(offset), + .is_gather = true, + .is_gather_implicit_lod = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(REF), R(U), R(V), R(OFFUV6_R), + }, + }, + }, [BRW_SAMPLER_OPCODE_GATHER4_PO_L] = { .name = "gather4_po_l", .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L, @@ -581,6 +706,8 @@ brw_sampler_payload_param_name(enum brw_sampler_payload_param param) case P(LOD_AI, "lod_ai"); case P(LOD_OFFUV6, "lod_offuv6"); case P(LOD_OFFUVR4, "lod_offuvr4"); + case P(OFFUV4_R, "offuv4_r"); + case P(OFFUV6_R, "offuv6_r"); case P(SI, "si"); case P(SSI, "ssi"); case P(MCS, "mcs"); diff --git a/src/intel/compiler/brw/brw_sampler.h b/src/intel/compiler/brw/brw_sampler.h index b8c93e9c895..88e7fe81489 100644 --- a/src/intel/compiler/brw/brw_sampler.h +++ b/src/intel/compiler/brw/brw_sampler.h @@ -41,6 +41,9 @@ enum brw_sampler_payload_param { BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUV6, BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUVR4, BRW_SAMPLER_PAYLOAD_PARAM_MLOD_R, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUV4_R, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUV6_R, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR4_R, BRW_SAMPLER_PAYLOAD_PARAM_SI, BRW_SAMPLER_PAYLOAD_PARAM_SSI, BRW_SAMPLER_PAYLOAD_PARAM_MCS, @@ -71,6 +74,14 @@ enum ENUM_PACKED brw_sampler_opcode { BRW_SAMPLER_OPCODE_SAMPLE_B_C_PACKED, BRW_SAMPLER_OPCODE_SAMPLE_L_C, BRW_SAMPLER_OPCODE_SAMPLE_L_C_PACKED, + BRW_SAMPLER_OPCODE_SAMPLE_PO, + BRW_SAMPLER_OPCODE_SAMPLE_PO_B, + BRW_SAMPLER_OPCODE_SAMPLE_PO_LZ, + BRW_SAMPLER_OPCODE_SAMPLE_PO_L, + BRW_SAMPLER_OPCODE_SAMPLE_PO_C_LZ, + BRW_SAMPLER_OPCODE_SAMPLE_PO_C, + BRW_SAMPLER_OPCODE_SAMPLE_PO_C_L, + BRW_SAMPLER_OPCODE_SAMPLE_PO_D, BRW_SAMPLER_OPCODE_LD_LZ, BRW_SAMPLER_OPCODE_LD, BRW_SAMPLER_OPCODE_LOD, @@ -88,6 +99,8 @@ enum ENUM_PACKED brw_sampler_opcode { BRW_SAMPLER_OPCODE_GATHER4_PO_B, BRW_SAMPLER_OPCODE_GATHER4_PO_C, BRW_SAMPLER_OPCODE_GATHER4_PO_C_PACKED, + BRW_SAMPLER_OPCODE_GATHER4_PO_I, + BRW_SAMPLER_OPCODE_GATHER4_PO_I_C, BRW_SAMPLER_OPCODE_GATHER4_PO_L, BRW_SAMPLER_OPCODE_GATHER4_PO_L_C, BRW_SAMPLER_OPCODE_LD2DMS_W,