diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index d901dca6060..82605af5861 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -25,6 +25,7 @@ #include "brw_builder.h" #include "brw_nir.h" #include "brw_eu.h" +#include "brw_sampler.h" #include "nir.h" #include "nir_intrinsics.h" #include "nir_search_helpers.h" diff --git a/src/intel/compiler/brw/brw_sampler.c b/src/intel/compiler/brw/brw_sampler.c new file mode 100644 index 00000000000..da8338d80f8 --- /dev/null +++ b/src/intel/compiler/brw/brw_sampler.c @@ -0,0 +1,758 @@ +/* Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "brw_sampler.h" +#include "brw_eu_defines.h" + +#define DEFINE_COND(name, condition) \ + static bool \ + name(const nir_tex_instr *tex, const struct intel_device_info *devinfo) \ + { \ + return (condition); \ + } \ + \ + static bool \ + not_##name(const nir_tex_instr *tex, const struct intel_device_info *devinfo) \ + { \ + return !(condition); \ + } \ + +DEFINE_COND(gfx200_cube_array, + devinfo->verx10 >= 200 && + tex->is_array && + tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); +DEFINE_COND(gfx200_non_cube_array, + devinfo->verx10 >= 200 && + !(tex->is_array && + tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)); +DEFINE_COND(gfx125, devinfo->verx10 >= 125); +DEFINE_COND(gfx200, devinfo->verx10 >= 200); +DEFINE_COND(gfx200_2darray, + devinfo->verx10 >= 200 && + tex->is_array && + tex->sampler_dim == GLSL_SAMPLER_DIM_2D); + +/* This array defines the availability of a particular opcode for a given NIR + * tex instructions and platform. An entry without a callback is assumed to be + * available on all platforms. + * + * We might revisit at some point to have one of this table per generation to + * tune down the crazy. + */ +typedef bool (*opcode_filter_cb)(const nir_tex_instr *, const struct intel_device_info *); + +static const opcode_filter_cb opcode_filters[BRW_SAMPLER_OPCODE_MAX] = { + [BRW_SAMPLER_OPCODE_SAMPLE_B] = not_gfx200, + [BRW_SAMPLER_OPCODE_SAMPLE_B_REDUCED] = gfx200_non_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_B_PACKED] = gfx200_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_L] = not_gfx200_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_L_REDUCED] = gfx200_non_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_L_PACKED] = gfx200_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_D] = not_gfx125, + [BRW_SAMPLER_OPCODE_SAMPLE_D_REDUCED] = gfx125, + [BRW_SAMPLER_OPCODE_SAMPLE_B_C] = not_gfx200_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_B_C_PACKED] = gfx200_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_L_C] = not_gfx200_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_L_C_PACKED] = gfx200_cube_array, + [BRW_SAMPLER_OPCODE_SAMPLE_D_C] = not_gfx200_2darray, + [BRW_SAMPLER_OPCODE_SAMPLE_D_C_PACKED] = gfx200_2darray, + [BRW_SAMPLER_OPCODE_GATHER4_B] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_I] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_L] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO] = not_gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO_PACKED] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO_B] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO_C] = not_gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO_C_PACKED] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO_L] = gfx200, + [BRW_SAMPLER_OPCODE_GATHER4_PO_L_C] = gfx200, + [BRW_SAMPLER_OPCODE_LD2DMS_W] = not_gfx125, + [BRW_SAMPLER_OPCODE_LD2DMS_W_GFX125] = gfx125, +}; + +#define N(name) BITFIELD_BIT(nir_tex_src_##name) +#define R(name) { BRW_SAMPLER_PAYLOAD_PARAM_##name, false } +#define O(name) { BRW_SAMPLER_PAYLOAD_PARAM_##name, true } + +/* This array defines all the possible sampler payload formats. Quite a few + * entry end up being duplicated due to changes from generation to generation. + */ +static const struct sampler_opcode_desc { + const char *name; + uint32_t hw_opcode; + uint32_t nir_src_mask; + bool is_fetch:1; + bool is_gather:1; + bool lod_zero:1; + bool has_offset_payload:1; + bool is_gather_implicit_lod:1; + struct brw_sampler_payload_desc payload; +} sampler_opcode_descs[] = { + [BRW_SAMPLER_OPCODE_SAMPLE] = { + .name = "sample", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE, + .nir_src_mask = N(coord) | N(min_lod) | N(offset), + .payload = { + .sources = { + R(U), R(V), O(R), O(AI), O(MLOD), + }, + } + }, + [BRW_SAMPLER_OPCODE_SAMPLE_B] = { + .name = "sample_b", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS, + .nir_src_mask = N(coord) | N(bias) | N(min_lod) | N(offset), + .payload = { + .sources = { + R(BIAS), R(U), O(V), O(R), O(AI), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_B_PACKED] = { + .name = "sample_b (packed)", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS, + .nir_src_mask = N(coord) | N(bias) | N(min_lod) | N(offset), + .payload = { + .sources = { + R(BIAS_AI), R(U), O(V), O(R), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_B_REDUCED] = { + .name = "sample_b (reduced)", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS, + .nir_src_mask = N(coord) | N(bias) | N(min_lod) | N(offset), + .payload = { + .sources = { + R(BIAS), R(U), O(V), O(R), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_L] = { + .name = "sample_l", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD, + .nir_src_mask = N(coord) | N(lod) | N(offset), + .payload = { + .sources = { + R(LOD), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_L_PACKED] = { + .name = "sample_l (packed)", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD, + .nir_src_mask = N(coord) | N(lod) | N(offset), + .payload = { + .sources = { + R(LOD_AI), R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_L_REDUCED] = { + .name = "sample_l (reduced)", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD, + .nir_src_mask = N(coord) | N(lod) | N(offset), + .payload = { + .sources = { + R(LOD), R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_C] = { + .name = "sample_c", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_COMPARE, + .nir_src_mask = N(comparator) | N(coord) | N(min_lod) | N(offset), + .payload = { + .sources = { + R(REF), R(U), O(V), O(R), O(AI), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_D] = { + .name = "sample_d", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS, + .nir_src_mask = N(coord) | N(min_lod) | N(ddx) | N(ddy) | N(offset), + .payload = { + .sources = { + R(U), R(DUDX), R(DUDY), O(V), O(DVDX), O(DVDY), O(R), O(DRDX), O(DRDY), O(AI), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_D_REDUCED] = { + .name = "sample_d (reduced)", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS, + .nir_src_mask = N(coord) | N(min_lod) | N(ddx) | N(ddy) | N(offset), + .payload = { + .sources = { + R(U), R(DUDX), R(DUDY), O(V), O(DVDX), O(DVDY), O(R), O(MLOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_B_C] = { + .name = "sample_b_c", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE, + .nir_src_mask = N(comparator) | N(bias) | N(coord) | N(offset), + .payload = { + .sources = { + R(REF), R(BIAS), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_B_C_PACKED] = { + .name = "sample_b_c (packed)", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE, + .nir_src_mask = N(comparator) | N(bias) | N(coord) | N(offset), + .payload = { + .sources = { + R(REF), R(BIAS_AI), R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_D_C] = { + .name = "sample_d_c", + .hw_opcode = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE, + .nir_src_mask = N(comparator) | N(coord) | N(ddx) | N(ddy) | N(offset), + .payload = { + .sources = { + R(REF), R(U), R(DUDX), R(DUDY), O(V), O(DVDX), O(DVDY), O(R), O(DRDX), O(DRDY), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_D_C_PACKED] = { + .name = "sample_d_c (packed)", + .hw_opcode = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE, + .nir_src_mask = N(comparator) | N(coord) | N(ddx) | N(ddy) | N(offset), + .payload = { + .sources = { + R(REF), R(U), R(DUDX), R(DUDY), O(V), O(DVDX), O(DVDY), O(MLOD_R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_L_C] = { + .name = "sample_l_c", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE, + .nir_src_mask = N(comparator) | N(lod) | N(coord) | N(offset), + .payload = { + .sources = { + R(REF), R(LOD), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_L_C_PACKED] = { + .name = "sample_l_c (packed)", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE, + .nir_src_mask = N(comparator) | N(lod) | N(coord) | N(offset), + .payload = { + .sources = { + R(REF), R(LOD_AI), R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_LZ] = { + .name = "sample_lz", + .hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_LZ, + .nir_src_mask = N(coord) | N(lod) | N(offset), + .lod_zero = true, + .payload = { + .sources = { + R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLE_C_LZ] = { + .name = "sample_c_lz", + .hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ, + .nir_src_mask = N(comparator) | N(coord) | N(lod) | N(offset), + .lod_zero = true, + .payload = { + .sources = { + R(REF), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_LD] = { + .name = "ld", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LD, + .nir_src_mask = N(lod) | N(coord) | N(offset), + .is_fetch = true, + .payload = { + .sources = { + R(U), O(V), R(LOD), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_LD_LZ] = { + .name = "ld_lz", + .hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ, + .nir_src_mask = N(coord) | N(lod) | N(offset), + .lod_zero = true, + .is_fetch = true, + .payload = { + .sources = { + R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_LOD] = { + .name = "lod", + .hw_opcode = GFX5_SAMPLER_MESSAGE_LOD, + .nir_src_mask = N(coord), + .payload = { + .sources = { + R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_RESINFO] = { + .name = "resinfo", + .hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO, + .nir_src_mask = N(lod), + .payload = { + .sources = { + R(LOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_SAMPLEINFO] = { + .name = "sampleinfo", + .hw_opcode = GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO, + }, + [BRW_SAMPLER_OPCODE_GATHER4] = { + .name = "gather4", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4, + .nir_src_mask = N(coord) | N(offset), + .is_gather = true, + .payload = { + .sources = { + R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_B] = { + .name = "gather4_b", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B, + .nir_src_mask = N(bias) | N(coord) | N(offset), + .is_gather = true, + .payload = { + .sources = { + R(BIAS), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_C] = { + .name = "gather4_c", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C, + .nir_src_mask = N(comparator) | N(coord) | N(offset), + .is_gather = true, + .payload = { + .sources = { + R(REF), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_I] = { + .name = "gather4_i", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I, + .nir_src_mask = N(coord) | N(offset), + .is_gather = true, + .payload = { + .sources = { + R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_I_C] = { + .name = "gather4_i_c", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C, + .nir_src_mask = N(comparator) | N(coord) | N(offset), + .is_gather = true, + .payload = { + .sources = { + R(REF), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_L] = { + .name = "gather4_l", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L, + .nir_src_mask = N(lod) | N(coord) | N(offset), + .is_gather = true, + .payload = { + .sources = { + R(LOD), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_L_C] = { + .name = "gather4_l_c", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C, + .nir_src_mask = N(comparator) | N(lod) | N(coord) | N(offset), + .is_gather = true, + .payload = { + .sources = { + R(REF), R(LOD), R(U), O(V), O(R), O(AI), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_PO] = { + .name = "gather4_po", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO, + .nir_src_mask = N(coord) | N(offset), + .is_gather = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(U), O(V), R(OFFU), O(OFFV), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_PO_PACKED] = { + .name = "gather4_po (packed)", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO, + .nir_src_mask = N(coord) | N(offset), + .is_gather = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(U), O(V), R(OFFUV6), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_PO_B] = { + .name = "gather4_po_b", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_B, + .nir_src_mask = N(bias) | N(coord) | N(offset), + .is_gather = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(BIAS_OFFUV6), R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_PO_C] = { + .name = "gather4_po_c", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C, + .nir_src_mask = N(comparator) | N(coord) | N(offset), + .is_gather = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(REF), R(U), O(V), R(OFFU), O(OFFV), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_PO_C_PACKED] = { + .name = "gather4_po_c (packed)", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C, + .nir_src_mask = N(comparator) | N(coord) | N(offset), + .is_gather = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(REF), R(U), O(V), R(OFFUVR6), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_I] = { + .name = "gather4_i", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I, + .nir_src_mask = N(coord) | N(offset), + .is_gather = true, + .is_gather_implicit_lod = true, + .payload = { + .sources = { + R(U), O(V), O(R), O(AI) + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_PO_L] = { + .name = "gather4_po_l", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L, + .nir_src_mask = N(coord) | N(lod) | N(offset), + .is_gather = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(LOD_OFFUV6), R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_GATHER4_PO_L_C] = { + .name = "gather4_po_l_c", + .hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L_C, + .nir_src_mask = N(comparator) | N(coord) | N(offset), + .is_gather = true, + .has_offset_payload = true, + .payload = { + .sources = { + R(REF), R(LOD_OFFUV6), R(U), O(V), O(R), + }, + }, + }, + [BRW_SAMPLER_OPCODE_LD2DMS_W] = { + .name = "ld2dms_w", + .hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W, + .nir_src_mask = N(ms_index) | N(ms_mcs_intel) | N(coord) | N(lod), + .is_fetch = true, + .payload = { + .sources = { + R(SI), R(MCSL), R(MCSH), R(U), O(V), O(R), O(LOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_LD2DMS_W_GFX125] = { + .name = "ld2dms_w (gfx125)", + .hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W, + .nir_src_mask = N(ms_index) | N(ms_mcs_intel) | N(coord) | N(lod), + .is_fetch = true, + .payload = { + .sources = { + R(SI), R(MCS0), R(MCS1), R(MCS2), R(MCS3), R(U), O(V), O(R), O(LOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_LD_MCS] = { + .name = "ld_mcs", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS, + .nir_src_mask = 0 /* internal */, + .is_fetch = true, + .payload = { + .sources = { + R(U), O(V), O(R), O(LOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_LD2DMS] = { + .name = "ld2dms", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS, + .nir_src_mask = 0 /* internal */, + .is_fetch = true, + .payload = { + .sources = { + R(SI), R(MCS), R(U), O(V), O(R), O(LOD), + }, + }, + }, + [BRW_SAMPLER_OPCODE_LD2DSS] = { + .name = "ld2dss", + .hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS, + .nir_src_mask = 0 /* internal */, + .is_fetch = true, + .payload = { + .sources = { + R(SSI), R(U), O(V), O(R), O(LOD), + }, + }, + }, +}; + +#undef R +#undef O + +#define P(name, str) BRW_SAMPLER_PAYLOAD_PARAM_##name: return str + +const char * +brw_sampler_payload_param_name(enum brw_sampler_payload_param param) +{ + switch (param) { + case P(U, "u"); + case P(V, "v"); + case P(R, "r"); + case P(AI, "ai"); + case P(BIAS, "bias"); + case P(LOD, "lod"); + case P(MLOD, "mlod"); + case P(REF, "ref"); + case P(DUDX, "dudx"); + case P(DUDY, "dudy"); + case P(DVDX, "dvdx"); + case P(DVDY, "dvdy"); + case P(DRDX, "drdx"); + case P(DRDY, "drdy"); + case P(OFFU, "offu"); + case P(OFFV, "offv"); + case P(OFFUV6, "offuv6"); + case P(OFFUVR6, "offuvr6"); + case P(BIAS_AI, "bias_ai"); + case P(BIAS_OFFUV6, "bias_offuv6"); + case P(BIAS_OFFUVR4, "bias_offuvr4"); + case P(LOD_AI, "lod_ai"); + case P(LOD_OFFUV6, "lod_offuv6"); + case P(LOD_OFFUVR4, "lod_offuvr4"); + case P(SI, "si"); + case P(SSI, "ssi"); + case P(MCS, "mcs"); + case P(MCSL, "mcsl"); + case P(MCSH, "mcsh"); + case P(MCS0, "mcs0"); + case P(MCS1, "mcs1"); + case P(MCS2, "mcs2"); + case P(MCS3, "mcs3"); + default: UNREACHABLE("invalid param"); + } +} + +#undef P + +uint32_t +brw_get_sampler_hw_opcode(enum brw_sampler_opcode opcode) +{ + assert(opcode < ARRAY_SIZE(sampler_opcode_descs)); + return sampler_opcode_descs[opcode].hw_opcode; +} + +bool +brw_sampler_opcode_is_gather(enum brw_sampler_opcode opcode) +{ + assert(opcode < ARRAY_SIZE(sampler_opcode_descs)); + return sampler_opcode_descs[opcode].is_gather; +} + +const char * +brw_sampler_opcode_name(enum brw_sampler_opcode opcode) +{ + assert(opcode < ARRAY_SIZE(sampler_opcode_descs)); + return sampler_opcode_descs[opcode].name; +} + +const struct brw_sampler_payload_desc * +brw_get_sampler_payload_desc(enum brw_sampler_opcode opcode) +{ + assert(opcode < ARRAY_SIZE(sampler_opcode_descs)); + return &sampler_opcode_descs[opcode].payload; +} + +static uint32_t +opcode_sources(const struct sampler_opcode_desc *opcode) +{ + uint32_t count = 0; + while (opcode->payload.sources[count].param != BRW_SAMPLER_PAYLOAD_PARAM_INVALID) + count++; + return count; +} + +bool +brw_nir_tex_offset_in_constant_range(const nir_tex_instr *tex, + unsigned offset_index) +{ + assert(tex->src[offset_index].src_type == nir_tex_src_offset); + + if (!nir_src_is_const(tex->src[offset_index].src)) + return false; + + const unsigned num_components = + nir_tex_instr_src_size(tex, offset_index); + for (unsigned i = 0; i < num_components; i++) { + int offset = nir_src_comp_as_int(tex->src[offset_index].src, i); + if (offset < -8 || offset > 7) + return false; + } + + return true; +} + +enum brw_sampler_opcode +brw_get_sampler_opcode_from_tex(const struct intel_device_info *devinfo, + const nir_tex_instr *tex) +{ + /* Deal with some corner cases first */ + switch (tex->op) { + case nir_texop_lod: return BRW_SAMPLER_OPCODE_LOD; + case nir_texop_query_levels: return BRW_SAMPLER_OPCODE_RESINFO; + case nir_texop_texture_samples: return BRW_SAMPLER_OPCODE_SAMPLEINFO; + case nir_texop_txf_ms_mcs_intel: return BRW_SAMPLER_OPCODE_LD_MCS; + case nir_texop_txs: return BRW_SAMPLER_OPCODE_RESINFO; + default: break; + } + + const bool is_fetch = + tex->op == nir_texop_txf || + tex->op == nir_texop_txf_ms || + tex->op == nir_texop_txf_ms_fb || + tex->op == nir_texop_txf_ms_mcs_intel; + + const bool is_gather = tex->op == nir_texop_tg4; + + const int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod); + const bool lod_zero = + lod_index >= 0 && + nir_src_is_const(tex->src[lod_index].src) && + nir_src_as_const_value(tex->src[lod_index].src)->u32 == 0; + + /* We can stuff the offsets into the message header if they are in the + * encoding range [-8, 7]. Otherwise we need a payload slot for them. + */ + bool offset_non_constant_or_non_header_range = false; + const int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); + if (offset_index >= 0) { + offset_non_constant_or_non_header_range = + !brw_nir_tex_offset_in_constant_range(tex, offset_index); + } + + uint32_t src_mask = 0; + for (uint32_t i = 0; i < tex->num_srcs; i++) + src_mask |= BITFIELD_BIT(tex->src[i].src_type); + + const uint32_t src_mask_ignore = + N(texture_deref) | + N(sampler_deref) | + N(texture_offset) | + N(sampler_offset) | + N(texture_handle) | + N(sampler_handle); + + src_mask &= ~src_mask_ignore; + +#if DEBUG_SAMPLER_SELECTION + fprintf(stderr, "NIR: "); + nir_print_instr(&tex->instr, stderr); + fprintf(stderr, "\n"); +#define SKIP_IF(name, cond) { \ + if (cond) { \ + fprintf(stderr, "%s: %s failed\n", \ + brw_sampler_opcode_name(i), name); \ + continue; \ + } \ + } +#else +#define SKIP_IF(name, cond) { if (cond) { continue; } } +#endif + + enum brw_sampler_opcode opcode_index = BRW_SAMPLER_OPCODE_MAX; + for (uint32_t i = 0; i < ARRAY_SIZE(sampler_opcode_descs); i++) { + SKIP_IF("generation requirement not met", + opcode_filters[i] != NULL && !opcode_filters[i](tex, devinfo)); + + SKIP_IF("non constant offsets", + offset_non_constant_or_non_header_range && + !sampler_opcode_descs[i].has_offset_payload); + + SKIP_IF("not fetch instruction", + is_fetch != sampler_opcode_descs[i].is_fetch); + + SKIP_IF("not gather instruction", + is_gather != sampler_opcode_descs[i].is_gather); + + SKIP_IF("not gather implicit lod", + tex->is_gather_implicit_lod != + sampler_opcode_descs[i].is_gather_implicit_lod); + + SKIP_IF("non lod zero", + !lod_zero && sampler_opcode_descs[i].lod_zero); + + SKIP_IF("non matching sources", + (sampler_opcode_descs[i].nir_src_mask & src_mask) != src_mask); + + opcode_index = i; +#if DEBUG_SAMPLER_SELECTION + fprintf(stderr, "selected %s\n", brw_sampler_opcode_name(opcode_index)); +#endif + break; + } + + assert(opcode_index < BRW_SAMPLER_OPCODE_MAX); + + return opcode_index; +} diff --git a/src/intel/compiler/brw/brw_sampler.h b/src/intel/compiler/brw/brw_sampler.h new file mode 100644 index 00000000000..b8c93e9c895 --- /dev/null +++ b/src/intel/compiler/brw/brw_sampler.h @@ -0,0 +1,152 @@ +/* Copyright © 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "compiler/nir/nir.h" +#include "brw_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum brw_sampler_payload_param { + BRW_SAMPLER_PAYLOAD_PARAM_INVALID, + + BRW_SAMPLER_PAYLOAD_PARAM_U, + BRW_SAMPLER_PAYLOAD_PARAM_V, + BRW_SAMPLER_PAYLOAD_PARAM_R, + BRW_SAMPLER_PAYLOAD_PARAM_AI, + BRW_SAMPLER_PAYLOAD_PARAM_BIAS, + BRW_SAMPLER_PAYLOAD_PARAM_LOD, + BRW_SAMPLER_PAYLOAD_PARAM_MLOD, + BRW_SAMPLER_PAYLOAD_PARAM_REF, + BRW_SAMPLER_PAYLOAD_PARAM_DUDX, + BRW_SAMPLER_PAYLOAD_PARAM_DUDY, + BRW_SAMPLER_PAYLOAD_PARAM_DVDX, + BRW_SAMPLER_PAYLOAD_PARAM_DVDY, + BRW_SAMPLER_PAYLOAD_PARAM_DRDX, + BRW_SAMPLER_PAYLOAD_PARAM_DRDY, + BRW_SAMPLER_PAYLOAD_PARAM_OFFU, + BRW_SAMPLER_PAYLOAD_PARAM_OFFV, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUV4, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR4, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUV6, + BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR6, + BRW_SAMPLER_PAYLOAD_PARAM_BIAS_AI, + BRW_SAMPLER_PAYLOAD_PARAM_BIAS_OFFUV6, + BRW_SAMPLER_PAYLOAD_PARAM_BIAS_OFFUVR4, + BRW_SAMPLER_PAYLOAD_PARAM_LOD_AI, + BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUV6, + BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUVR4, + BRW_SAMPLER_PAYLOAD_PARAM_MLOD_R, + BRW_SAMPLER_PAYLOAD_PARAM_SI, + BRW_SAMPLER_PAYLOAD_PARAM_SSI, + BRW_SAMPLER_PAYLOAD_PARAM_MCS, + BRW_SAMPLER_PAYLOAD_PARAM_MCSL, + BRW_SAMPLER_PAYLOAD_PARAM_MCSH, + BRW_SAMPLER_PAYLOAD_PARAM_MCS0, + BRW_SAMPLER_PAYLOAD_PARAM_MCS1, + BRW_SAMPLER_PAYLOAD_PARAM_MCS2, + BRW_SAMPLER_PAYLOAD_PARAM_MCS3, +}; + +enum ENUM_PACKED brw_sampler_opcode { + BRW_SAMPLER_OPCODE_SAMPLE_LZ, + BRW_SAMPLER_OPCODE_SAMPLE, + BRW_SAMPLER_OPCODE_SAMPLE_B, + BRW_SAMPLER_OPCODE_SAMPLE_B_PACKED, + BRW_SAMPLER_OPCODE_SAMPLE_B_REDUCED, + BRW_SAMPLER_OPCODE_SAMPLE_C_LZ, + BRW_SAMPLER_OPCODE_SAMPLE_C, + BRW_SAMPLER_OPCODE_SAMPLE_D, + BRW_SAMPLER_OPCODE_SAMPLE_D_REDUCED, + BRW_SAMPLER_OPCODE_SAMPLE_D_C, + BRW_SAMPLER_OPCODE_SAMPLE_D_C_PACKED, + BRW_SAMPLER_OPCODE_SAMPLE_L, + BRW_SAMPLER_OPCODE_SAMPLE_L_PACKED, + BRW_SAMPLER_OPCODE_SAMPLE_L_REDUCED, + BRW_SAMPLER_OPCODE_SAMPLE_B_C, + BRW_SAMPLER_OPCODE_SAMPLE_B_C_PACKED, + BRW_SAMPLER_OPCODE_SAMPLE_L_C, + BRW_SAMPLER_OPCODE_SAMPLE_L_C_PACKED, + BRW_SAMPLER_OPCODE_LD_LZ, + BRW_SAMPLER_OPCODE_LD, + BRW_SAMPLER_OPCODE_LOD, + BRW_SAMPLER_OPCODE_RESINFO, + BRW_SAMPLER_OPCODE_SAMPLEINFO, + BRW_SAMPLER_OPCODE_GATHER4, + BRW_SAMPLER_OPCODE_GATHER4_B, + BRW_SAMPLER_OPCODE_GATHER4_C, + BRW_SAMPLER_OPCODE_GATHER4_I, + BRW_SAMPLER_OPCODE_GATHER4_I_C, + BRW_SAMPLER_OPCODE_GATHER4_L, + BRW_SAMPLER_OPCODE_GATHER4_L_C, + BRW_SAMPLER_OPCODE_GATHER4_PO, + BRW_SAMPLER_OPCODE_GATHER4_PO_PACKED, + BRW_SAMPLER_OPCODE_GATHER4_PO_B, + BRW_SAMPLER_OPCODE_GATHER4_PO_C, + BRW_SAMPLER_OPCODE_GATHER4_PO_C_PACKED, + BRW_SAMPLER_OPCODE_GATHER4_PO_L, + BRW_SAMPLER_OPCODE_GATHER4_PO_L_C, + BRW_SAMPLER_OPCODE_LD2DMS_W, + BRW_SAMPLER_OPCODE_LD2DMS_W_GFX125, + //BRW_SAMPLER_OPCODE_LD2DMS_W_GFX12, + BRW_SAMPLER_OPCODE_LD_MCS, + BRW_SAMPLER_OPCODE_LD2DMS, + BRW_SAMPLER_OPCODE_LD2DSS, + + BRW_SAMPLER_OPCODE_MAX, +}; + +struct brw_sampler_payload_src { + enum brw_sampler_payload_param param; + bool optional; +}; + +struct brw_sampler_payload_desc { + struct brw_sampler_payload_src sources[12]; +}; + +const char * +brw_sampler_payload_param_name(enum brw_sampler_payload_param param); + +const char * +brw_sampler_opcode_name(enum brw_sampler_opcode opcode); + +const struct brw_sampler_payload_desc * +brw_get_sampler_payload_desc(enum brw_sampler_opcode opcode); + +uint32_t +brw_get_sampler_hw_opcode(enum brw_sampler_opcode opcode); + +enum brw_sampler_opcode +brw_get_sampler_opcode_from_tex(const struct intel_device_info *devinfo, + const nir_tex_instr *tex); + +bool +brw_sampler_opcode_is_gather(enum brw_sampler_opcode opcode); + +static inline int +brw_sampler_opcode_param_index(enum brw_sampler_opcode opcode, + enum brw_sampler_payload_param param) +{ + const struct brw_sampler_payload_desc *desc = + brw_get_sampler_payload_desc(opcode); + + for (int i = 0; desc->sources[i].param != BRW_SAMPLER_PAYLOAD_PARAM_INVALID; i++) { + if (desc->sources[i].param == param) + return i; + } + + return -1; +} + +bool +brw_nir_tex_offset_in_constant_range(const nir_tex_instr *tex, + unsigned offset_index); + +#ifdef __cplusplus +} +#endif diff --git a/src/intel/compiler/brw/meson.build b/src/intel/compiler/brw/meson.build index 0af7e216c22..1d2de8be4c5 100644 --- a/src/intel/compiler/brw/meson.build +++ b/src/intel/compiler/brw/meson.build @@ -95,6 +95,8 @@ libintel_compiler_brw_files = files( 'brw_reg_type.c', 'brw_reg_type.h', 'brw_rt.h', + 'brw_sampler.c', + 'brw_sampler.h', 'brw_schedule_instructions.cpp', 'brw_shader.cpp', 'brw_shader.h',