brw: add a new sampler payload parameter description

Centralize all the information in one place and also make the mapping
decision from nir_tex_instr -> HW opcode much earlier.

This will help knowning exactly what the payload looks like early in
the backend IR and when it needs to lowered to a smaller SIMD size due
to HW limits. It will also allow NIR lowering to know when to combine
parameters into a single packed component.

Finally, this also reduces the amount of LOAD_PAYLOAD we need to carry
in the backend IR, because we don't have to generate VEC()
LOAD_PAYLOAD() for coordinates etc... Those are useless if there is
any other parameter in the payload and we need need to add one more
LOAD_PAYLOAD() when doing the logical send lowering.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37171>
This commit is contained in:
Lionel Landwerlin 2025-08-26 19:25:49 +03:00 committed by Marge Bot
parent 28e73a6239
commit 7c77c4768a
4 changed files with 913 additions and 0 deletions

View file

@ -25,6 +25,7 @@
#include "brw_builder.h"
#include "brw_nir.h"
#include "brw_eu.h"
#include "brw_sampler.h"
#include "nir.h"
#include "nir_intrinsics.h"
#include "nir_search_helpers.h"

View file

@ -0,0 +1,758 @@
/* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "brw_sampler.h"
#include "brw_eu_defines.h"
#define DEFINE_COND(name, condition) \
static bool \
name(const nir_tex_instr *tex, const struct intel_device_info *devinfo) \
{ \
return (condition); \
} \
\
static bool \
not_##name(const nir_tex_instr *tex, const struct intel_device_info *devinfo) \
{ \
return !(condition); \
} \
DEFINE_COND(gfx200_cube_array,
devinfo->verx10 >= 200 &&
tex->is_array &&
tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
DEFINE_COND(gfx200_non_cube_array,
devinfo->verx10 >= 200 &&
!(tex->is_array &&
tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE));
DEFINE_COND(gfx125, devinfo->verx10 >= 125);
DEFINE_COND(gfx200, devinfo->verx10 >= 200);
DEFINE_COND(gfx200_2darray,
devinfo->verx10 >= 200 &&
tex->is_array &&
tex->sampler_dim == GLSL_SAMPLER_DIM_2D);
/* This array defines the availability of a particular opcode for a given NIR
* tex instructions and platform. An entry without a callback is assumed to be
* available on all platforms.
*
* We might revisit at some point to have one of this table per generation to
* tune down the crazy.
*/
typedef bool (*opcode_filter_cb)(const nir_tex_instr *, const struct intel_device_info *);
static const opcode_filter_cb opcode_filters[BRW_SAMPLER_OPCODE_MAX] = {
[BRW_SAMPLER_OPCODE_SAMPLE_B] = not_gfx200,
[BRW_SAMPLER_OPCODE_SAMPLE_B_REDUCED] = gfx200_non_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_B_PACKED] = gfx200_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_L] = not_gfx200_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_L_REDUCED] = gfx200_non_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_L_PACKED] = gfx200_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_D] = not_gfx125,
[BRW_SAMPLER_OPCODE_SAMPLE_D_REDUCED] = gfx125,
[BRW_SAMPLER_OPCODE_SAMPLE_B_C] = not_gfx200_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_B_C_PACKED] = gfx200_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_L_C] = not_gfx200_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_L_C_PACKED] = gfx200_cube_array,
[BRW_SAMPLER_OPCODE_SAMPLE_D_C] = not_gfx200_2darray,
[BRW_SAMPLER_OPCODE_SAMPLE_D_C_PACKED] = gfx200_2darray,
[BRW_SAMPLER_OPCODE_GATHER4_B] = gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_I] = gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_L] = gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_PO] = not_gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_PO_PACKED] = gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_PO_B] = gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_PO_C] = not_gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_PO_C_PACKED] = gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_PO_L] = gfx200,
[BRW_SAMPLER_OPCODE_GATHER4_PO_L_C] = gfx200,
[BRW_SAMPLER_OPCODE_LD2DMS_W] = not_gfx125,
[BRW_SAMPLER_OPCODE_LD2DMS_W_GFX125] = gfx125,
};
#define N(name) BITFIELD_BIT(nir_tex_src_##name)
#define R(name) { BRW_SAMPLER_PAYLOAD_PARAM_##name, false }
#define O(name) { BRW_SAMPLER_PAYLOAD_PARAM_##name, true }
/* This array defines all the possible sampler payload formats. Quite a few
* entry end up being duplicated due to changes from generation to generation.
*/
static const struct sampler_opcode_desc {
const char *name;
uint32_t hw_opcode;
uint32_t nir_src_mask;
bool is_fetch:1;
bool is_gather:1;
bool lod_zero:1;
bool has_offset_payload:1;
bool is_gather_implicit_lod:1;
struct brw_sampler_payload_desc payload;
} sampler_opcode_descs[] = {
[BRW_SAMPLER_OPCODE_SAMPLE] = {
.name = "sample",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE,
.nir_src_mask = N(coord) | N(min_lod) | N(offset),
.payload = {
.sources = {
R(U), R(V), O(R), O(AI), O(MLOD),
},
}
},
[BRW_SAMPLER_OPCODE_SAMPLE_B] = {
.name = "sample_b",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS,
.nir_src_mask = N(coord) | N(bias) | N(min_lod) | N(offset),
.payload = {
.sources = {
R(BIAS), R(U), O(V), O(R), O(AI), O(MLOD),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_B_PACKED] = {
.name = "sample_b (packed)",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS,
.nir_src_mask = N(coord) | N(bias) | N(min_lod) | N(offset),
.payload = {
.sources = {
R(BIAS_AI), R(U), O(V), O(R), O(MLOD),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_B_REDUCED] = {
.name = "sample_b (reduced)",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS,
.nir_src_mask = N(coord) | N(bias) | N(min_lod) | N(offset),
.payload = {
.sources = {
R(BIAS), R(U), O(V), O(R), O(MLOD),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_L] = {
.name = "sample_l",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD,
.nir_src_mask = N(coord) | N(lod) | N(offset),
.payload = {
.sources = {
R(LOD), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_L_PACKED] = {
.name = "sample_l (packed)",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD,
.nir_src_mask = N(coord) | N(lod) | N(offset),
.payload = {
.sources = {
R(LOD_AI), R(U), O(V), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_L_REDUCED] = {
.name = "sample_l (reduced)",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD,
.nir_src_mask = N(coord) | N(lod) | N(offset),
.payload = {
.sources = {
R(LOD), R(U), O(V), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_C] = {
.name = "sample_c",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_COMPARE,
.nir_src_mask = N(comparator) | N(coord) | N(min_lod) | N(offset),
.payload = {
.sources = {
R(REF), R(U), O(V), O(R), O(AI), O(MLOD),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_D] = {
.name = "sample_d",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS,
.nir_src_mask = N(coord) | N(min_lod) | N(ddx) | N(ddy) | N(offset),
.payload = {
.sources = {
R(U), R(DUDX), R(DUDY), O(V), O(DVDX), O(DVDY), O(R), O(DRDX), O(DRDY), O(AI), O(MLOD),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_D_REDUCED] = {
.name = "sample_d (reduced)",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS,
.nir_src_mask = N(coord) | N(min_lod) | N(ddx) | N(ddy) | N(offset),
.payload = {
.sources = {
R(U), R(DUDX), R(DUDY), O(V), O(DVDX), O(DVDY), O(R), O(MLOD),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_B_C] = {
.name = "sample_b_c",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE,
.nir_src_mask = N(comparator) | N(bias) | N(coord) | N(offset),
.payload = {
.sources = {
R(REF), R(BIAS), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_B_C_PACKED] = {
.name = "sample_b_c (packed)",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE,
.nir_src_mask = N(comparator) | N(bias) | N(coord) | N(offset),
.payload = {
.sources = {
R(REF), R(BIAS_AI), R(U), O(V), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_D_C] = {
.name = "sample_d_c",
.hw_opcode = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE,
.nir_src_mask = N(comparator) | N(coord) | N(ddx) | N(ddy) | N(offset),
.payload = {
.sources = {
R(REF), R(U), R(DUDX), R(DUDY), O(V), O(DVDX), O(DVDY), O(R), O(DRDX), O(DRDY), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_D_C_PACKED] = {
.name = "sample_d_c (packed)",
.hw_opcode = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE,
.nir_src_mask = N(comparator) | N(coord) | N(ddx) | N(ddy) | N(offset),
.payload = {
.sources = {
R(REF), R(U), R(DUDX), R(DUDY), O(V), O(DVDX), O(DVDY), O(MLOD_R),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_L_C] = {
.name = "sample_l_c",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE,
.nir_src_mask = N(comparator) | N(lod) | N(coord) | N(offset),
.payload = {
.sources = {
R(REF), R(LOD), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_L_C_PACKED] = {
.name = "sample_l_c (packed)",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE,
.nir_src_mask = N(comparator) | N(lod) | N(coord) | N(offset),
.payload = {
.sources = {
R(REF), R(LOD_AI), R(U), O(V), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_LZ] = {
.name = "sample_lz",
.hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_LZ,
.nir_src_mask = N(coord) | N(lod) | N(offset),
.lod_zero = true,
.payload = {
.sources = {
R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLE_C_LZ] = {
.name = "sample_c_lz",
.hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_C_LZ,
.nir_src_mask = N(comparator) | N(coord) | N(lod) | N(offset),
.lod_zero = true,
.payload = {
.sources = {
R(REF), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_LD] = {
.name = "ld",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_LD,
.nir_src_mask = N(lod) | N(coord) | N(offset),
.is_fetch = true,
.payload = {
.sources = {
R(U), O(V), R(LOD), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_LD_LZ] = {
.name = "ld_lz",
.hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_LD_LZ,
.nir_src_mask = N(coord) | N(lod) | N(offset),
.lod_zero = true,
.is_fetch = true,
.payload = {
.sources = {
R(U), O(V), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_LOD] = {
.name = "lod",
.hw_opcode = GFX5_SAMPLER_MESSAGE_LOD,
.nir_src_mask = N(coord),
.payload = {
.sources = {
R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_RESINFO] = {
.name = "resinfo",
.hw_opcode = GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
.nir_src_mask = N(lod),
.payload = {
.sources = {
R(LOD),
},
},
},
[BRW_SAMPLER_OPCODE_SAMPLEINFO] = {
.name = "sampleinfo",
.hw_opcode = GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO,
},
[BRW_SAMPLER_OPCODE_GATHER4] = {
.name = "gather4",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4,
.nir_src_mask = N(coord) | N(offset),
.is_gather = true,
.payload = {
.sources = {
R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_B] = {
.name = "gather4_b",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B,
.nir_src_mask = N(bias) | N(coord) | N(offset),
.is_gather = true,
.payload = {
.sources = {
R(BIAS), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_C] = {
.name = "gather4_c",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C,
.nir_src_mask = N(comparator) | N(coord) | N(offset),
.is_gather = true,
.payload = {
.sources = {
R(REF), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_I] = {
.name = "gather4_i",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I,
.nir_src_mask = N(coord) | N(offset),
.is_gather = true,
.payload = {
.sources = {
R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_I_C] = {
.name = "gather4_i_c",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C,
.nir_src_mask = N(comparator) | N(coord) | N(offset),
.is_gather = true,
.payload = {
.sources = {
R(REF), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_L] = {
.name = "gather4_l",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L,
.nir_src_mask = N(lod) | N(coord) | N(offset),
.is_gather = true,
.payload = {
.sources = {
R(LOD), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_L_C] = {
.name = "gather4_l_c",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C,
.nir_src_mask = N(comparator) | N(lod) | N(coord) | N(offset),
.is_gather = true,
.payload = {
.sources = {
R(REF), R(LOD), R(U), O(V), O(R), O(AI),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_PO] = {
.name = "gather4_po",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO,
.nir_src_mask = N(coord) | N(offset),
.is_gather = true,
.has_offset_payload = true,
.payload = {
.sources = {
R(U), O(V), R(OFFU), O(OFFV), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_PO_PACKED] = {
.name = "gather4_po (packed)",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO,
.nir_src_mask = N(coord) | N(offset),
.is_gather = true,
.has_offset_payload = true,
.payload = {
.sources = {
R(U), O(V), R(OFFUV6), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_PO_B] = {
.name = "gather4_po_b",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_B,
.nir_src_mask = N(bias) | N(coord) | N(offset),
.is_gather = true,
.has_offset_payload = true,
.payload = {
.sources = {
R(BIAS_OFFUV6), R(U), O(V), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_PO_C] = {
.name = "gather4_po_c",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C,
.nir_src_mask = N(comparator) | N(coord) | N(offset),
.is_gather = true,
.has_offset_payload = true,
.payload = {
.sources = {
R(REF), R(U), O(V), R(OFFU), O(OFFV), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_PO_C_PACKED] = {
.name = "gather4_po_c (packed)",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C,
.nir_src_mask = N(comparator) | N(coord) | N(offset),
.is_gather = true,
.has_offset_payload = true,
.payload = {
.sources = {
R(REF), R(U), O(V), R(OFFUVR6),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_I] = {
.name = "gather4_i",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I,
.nir_src_mask = N(coord) | N(offset),
.is_gather = true,
.is_gather_implicit_lod = true,
.payload = {
.sources = {
R(U), O(V), O(R), O(AI)
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_PO_L] = {
.name = "gather4_po_l",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L,
.nir_src_mask = N(coord) | N(lod) | N(offset),
.is_gather = true,
.has_offset_payload = true,
.payload = {
.sources = {
R(LOD_OFFUV6), R(U), O(V), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_GATHER4_PO_L_C] = {
.name = "gather4_po_l_c",
.hw_opcode = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L_C,
.nir_src_mask = N(comparator) | N(coord) | N(offset),
.is_gather = true,
.has_offset_payload = true,
.payload = {
.sources = {
R(REF), R(LOD_OFFUV6), R(U), O(V), O(R),
},
},
},
[BRW_SAMPLER_OPCODE_LD2DMS_W] = {
.name = "ld2dms_w",
.hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W,
.nir_src_mask = N(ms_index) | N(ms_mcs_intel) | N(coord) | N(lod),
.is_fetch = true,
.payload = {
.sources = {
R(SI), R(MCSL), R(MCSH), R(U), O(V), O(R), O(LOD),
},
},
},
[BRW_SAMPLER_OPCODE_LD2DMS_W_GFX125] = {
.name = "ld2dms_w (gfx125)",
.hw_opcode = GFX9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W,
.nir_src_mask = N(ms_index) | N(ms_mcs_intel) | N(coord) | N(lod),
.is_fetch = true,
.payload = {
.sources = {
R(SI), R(MCS0), R(MCS1), R(MCS2), R(MCS3), R(U), O(V), O(R), O(LOD),
},
},
},
[BRW_SAMPLER_OPCODE_LD_MCS] = {
.name = "ld_mcs",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_LD_MCS,
.nir_src_mask = 0 /* internal */,
.is_fetch = true,
.payload = {
.sources = {
R(U), O(V), O(R), O(LOD),
},
},
},
[BRW_SAMPLER_OPCODE_LD2DMS] = {
.name = "ld2dms",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DMS,
.nir_src_mask = 0 /* internal */,
.is_fetch = true,
.payload = {
.sources = {
R(SI), R(MCS), R(U), O(V), O(R), O(LOD),
},
},
},
[BRW_SAMPLER_OPCODE_LD2DSS] = {
.name = "ld2dss",
.hw_opcode = GFX7_SAMPLER_MESSAGE_SAMPLE_LD2DSS,
.nir_src_mask = 0 /* internal */,
.is_fetch = true,
.payload = {
.sources = {
R(SSI), R(U), O(V), O(R), O(LOD),
},
},
},
};
#undef R
#undef O
#define P(name, str) BRW_SAMPLER_PAYLOAD_PARAM_##name: return str
const char *
brw_sampler_payload_param_name(enum brw_sampler_payload_param param)
{
switch (param) {
case P(U, "u");
case P(V, "v");
case P(R, "r");
case P(AI, "ai");
case P(BIAS, "bias");
case P(LOD, "lod");
case P(MLOD, "mlod");
case P(REF, "ref");
case P(DUDX, "dudx");
case P(DUDY, "dudy");
case P(DVDX, "dvdx");
case P(DVDY, "dvdy");
case P(DRDX, "drdx");
case P(DRDY, "drdy");
case P(OFFU, "offu");
case P(OFFV, "offv");
case P(OFFUV6, "offuv6");
case P(OFFUVR6, "offuvr6");
case P(BIAS_AI, "bias_ai");
case P(BIAS_OFFUV6, "bias_offuv6");
case P(BIAS_OFFUVR4, "bias_offuvr4");
case P(LOD_AI, "lod_ai");
case P(LOD_OFFUV6, "lod_offuv6");
case P(LOD_OFFUVR4, "lod_offuvr4");
case P(SI, "si");
case P(SSI, "ssi");
case P(MCS, "mcs");
case P(MCSL, "mcsl");
case P(MCSH, "mcsh");
case P(MCS0, "mcs0");
case P(MCS1, "mcs1");
case P(MCS2, "mcs2");
case P(MCS3, "mcs3");
default: UNREACHABLE("invalid param");
}
}
#undef P
uint32_t
brw_get_sampler_hw_opcode(enum brw_sampler_opcode opcode)
{
assert(opcode < ARRAY_SIZE(sampler_opcode_descs));
return sampler_opcode_descs[opcode].hw_opcode;
}
bool
brw_sampler_opcode_is_gather(enum brw_sampler_opcode opcode)
{
assert(opcode < ARRAY_SIZE(sampler_opcode_descs));
return sampler_opcode_descs[opcode].is_gather;
}
const char *
brw_sampler_opcode_name(enum brw_sampler_opcode opcode)
{
assert(opcode < ARRAY_SIZE(sampler_opcode_descs));
return sampler_opcode_descs[opcode].name;
}
const struct brw_sampler_payload_desc *
brw_get_sampler_payload_desc(enum brw_sampler_opcode opcode)
{
assert(opcode < ARRAY_SIZE(sampler_opcode_descs));
return &sampler_opcode_descs[opcode].payload;
}
static uint32_t
opcode_sources(const struct sampler_opcode_desc *opcode)
{
uint32_t count = 0;
while (opcode->payload.sources[count].param != BRW_SAMPLER_PAYLOAD_PARAM_INVALID)
count++;
return count;
}
bool
brw_nir_tex_offset_in_constant_range(const nir_tex_instr *tex,
unsigned offset_index)
{
assert(tex->src[offset_index].src_type == nir_tex_src_offset);
if (!nir_src_is_const(tex->src[offset_index].src))
return false;
const unsigned num_components =
nir_tex_instr_src_size(tex, offset_index);
for (unsigned i = 0; i < num_components; i++) {
int offset = nir_src_comp_as_int(tex->src[offset_index].src, i);
if (offset < -8 || offset > 7)
return false;
}
return true;
}
enum brw_sampler_opcode
brw_get_sampler_opcode_from_tex(const struct intel_device_info *devinfo,
const nir_tex_instr *tex)
{
/* Deal with some corner cases first */
switch (tex->op) {
case nir_texop_lod: return BRW_SAMPLER_OPCODE_LOD;
case nir_texop_query_levels: return BRW_SAMPLER_OPCODE_RESINFO;
case nir_texop_texture_samples: return BRW_SAMPLER_OPCODE_SAMPLEINFO;
case nir_texop_txf_ms_mcs_intel: return BRW_SAMPLER_OPCODE_LD_MCS;
case nir_texop_txs: return BRW_SAMPLER_OPCODE_RESINFO;
default: break;
}
const bool is_fetch =
tex->op == nir_texop_txf ||
tex->op == nir_texop_txf_ms ||
tex->op == nir_texop_txf_ms_fb ||
tex->op == nir_texop_txf_ms_mcs_intel;
const bool is_gather = tex->op == nir_texop_tg4;
const int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod);
const bool lod_zero =
lod_index >= 0 &&
nir_src_is_const(tex->src[lod_index].src) &&
nir_src_as_const_value(tex->src[lod_index].src)->u32 == 0;
/* We can stuff the offsets into the message header if they are in the
* encoding range [-8, 7]. Otherwise we need a payload slot for them.
*/
bool offset_non_constant_or_non_header_range = false;
const int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
if (offset_index >= 0) {
offset_non_constant_or_non_header_range =
!brw_nir_tex_offset_in_constant_range(tex, offset_index);
}
uint32_t src_mask = 0;
for (uint32_t i = 0; i < tex->num_srcs; i++)
src_mask |= BITFIELD_BIT(tex->src[i].src_type);
const uint32_t src_mask_ignore =
N(texture_deref) |
N(sampler_deref) |
N(texture_offset) |
N(sampler_offset) |
N(texture_handle) |
N(sampler_handle);
src_mask &= ~src_mask_ignore;
#if DEBUG_SAMPLER_SELECTION
fprintf(stderr, "NIR: ");
nir_print_instr(&tex->instr, stderr);
fprintf(stderr, "\n");
#define SKIP_IF(name, cond) { \
if (cond) { \
fprintf(stderr, "%s: %s failed\n", \
brw_sampler_opcode_name(i), name); \
continue; \
} \
}
#else
#define SKIP_IF(name, cond) { if (cond) { continue; } }
#endif
enum brw_sampler_opcode opcode_index = BRW_SAMPLER_OPCODE_MAX;
for (uint32_t i = 0; i < ARRAY_SIZE(sampler_opcode_descs); i++) {
SKIP_IF("generation requirement not met",
opcode_filters[i] != NULL && !opcode_filters[i](tex, devinfo));
SKIP_IF("non constant offsets",
offset_non_constant_or_non_header_range &&
!sampler_opcode_descs[i].has_offset_payload);
SKIP_IF("not fetch instruction",
is_fetch != sampler_opcode_descs[i].is_fetch);
SKIP_IF("not gather instruction",
is_gather != sampler_opcode_descs[i].is_gather);
SKIP_IF("not gather implicit lod",
tex->is_gather_implicit_lod !=
sampler_opcode_descs[i].is_gather_implicit_lod);
SKIP_IF("non lod zero",
!lod_zero && sampler_opcode_descs[i].lod_zero);
SKIP_IF("non matching sources",
(sampler_opcode_descs[i].nir_src_mask & src_mask) != src_mask);
opcode_index = i;
#if DEBUG_SAMPLER_SELECTION
fprintf(stderr, "selected %s\n", brw_sampler_opcode_name(opcode_index));
#endif
break;
}
assert(opcode_index < BRW_SAMPLER_OPCODE_MAX);
return opcode_index;
}

View file

@ -0,0 +1,152 @@
/* Copyright © 2025 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include "compiler/nir/nir.h"
#include "brw_compiler.h"
#ifdef __cplusplus
extern "C" {
#endif
enum brw_sampler_payload_param {
BRW_SAMPLER_PAYLOAD_PARAM_INVALID,
BRW_SAMPLER_PAYLOAD_PARAM_U,
BRW_SAMPLER_PAYLOAD_PARAM_V,
BRW_SAMPLER_PAYLOAD_PARAM_R,
BRW_SAMPLER_PAYLOAD_PARAM_AI,
BRW_SAMPLER_PAYLOAD_PARAM_BIAS,
BRW_SAMPLER_PAYLOAD_PARAM_LOD,
BRW_SAMPLER_PAYLOAD_PARAM_MLOD,
BRW_SAMPLER_PAYLOAD_PARAM_REF,
BRW_SAMPLER_PAYLOAD_PARAM_DUDX,
BRW_SAMPLER_PAYLOAD_PARAM_DUDY,
BRW_SAMPLER_PAYLOAD_PARAM_DVDX,
BRW_SAMPLER_PAYLOAD_PARAM_DVDY,
BRW_SAMPLER_PAYLOAD_PARAM_DRDX,
BRW_SAMPLER_PAYLOAD_PARAM_DRDY,
BRW_SAMPLER_PAYLOAD_PARAM_OFFU,
BRW_SAMPLER_PAYLOAD_PARAM_OFFV,
BRW_SAMPLER_PAYLOAD_PARAM_OFFUV4,
BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR4,
BRW_SAMPLER_PAYLOAD_PARAM_OFFUV6,
BRW_SAMPLER_PAYLOAD_PARAM_OFFUVR6,
BRW_SAMPLER_PAYLOAD_PARAM_BIAS_AI,
BRW_SAMPLER_PAYLOAD_PARAM_BIAS_OFFUV6,
BRW_SAMPLER_PAYLOAD_PARAM_BIAS_OFFUVR4,
BRW_SAMPLER_PAYLOAD_PARAM_LOD_AI,
BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUV6,
BRW_SAMPLER_PAYLOAD_PARAM_LOD_OFFUVR4,
BRW_SAMPLER_PAYLOAD_PARAM_MLOD_R,
BRW_SAMPLER_PAYLOAD_PARAM_SI,
BRW_SAMPLER_PAYLOAD_PARAM_SSI,
BRW_SAMPLER_PAYLOAD_PARAM_MCS,
BRW_SAMPLER_PAYLOAD_PARAM_MCSL,
BRW_SAMPLER_PAYLOAD_PARAM_MCSH,
BRW_SAMPLER_PAYLOAD_PARAM_MCS0,
BRW_SAMPLER_PAYLOAD_PARAM_MCS1,
BRW_SAMPLER_PAYLOAD_PARAM_MCS2,
BRW_SAMPLER_PAYLOAD_PARAM_MCS3,
};
enum ENUM_PACKED brw_sampler_opcode {
BRW_SAMPLER_OPCODE_SAMPLE_LZ,
BRW_SAMPLER_OPCODE_SAMPLE,
BRW_SAMPLER_OPCODE_SAMPLE_B,
BRW_SAMPLER_OPCODE_SAMPLE_B_PACKED,
BRW_SAMPLER_OPCODE_SAMPLE_B_REDUCED,
BRW_SAMPLER_OPCODE_SAMPLE_C_LZ,
BRW_SAMPLER_OPCODE_SAMPLE_C,
BRW_SAMPLER_OPCODE_SAMPLE_D,
BRW_SAMPLER_OPCODE_SAMPLE_D_REDUCED,
BRW_SAMPLER_OPCODE_SAMPLE_D_C,
BRW_SAMPLER_OPCODE_SAMPLE_D_C_PACKED,
BRW_SAMPLER_OPCODE_SAMPLE_L,
BRW_SAMPLER_OPCODE_SAMPLE_L_PACKED,
BRW_SAMPLER_OPCODE_SAMPLE_L_REDUCED,
BRW_SAMPLER_OPCODE_SAMPLE_B_C,
BRW_SAMPLER_OPCODE_SAMPLE_B_C_PACKED,
BRW_SAMPLER_OPCODE_SAMPLE_L_C,
BRW_SAMPLER_OPCODE_SAMPLE_L_C_PACKED,
BRW_SAMPLER_OPCODE_LD_LZ,
BRW_SAMPLER_OPCODE_LD,
BRW_SAMPLER_OPCODE_LOD,
BRW_SAMPLER_OPCODE_RESINFO,
BRW_SAMPLER_OPCODE_SAMPLEINFO,
BRW_SAMPLER_OPCODE_GATHER4,
BRW_SAMPLER_OPCODE_GATHER4_B,
BRW_SAMPLER_OPCODE_GATHER4_C,
BRW_SAMPLER_OPCODE_GATHER4_I,
BRW_SAMPLER_OPCODE_GATHER4_I_C,
BRW_SAMPLER_OPCODE_GATHER4_L,
BRW_SAMPLER_OPCODE_GATHER4_L_C,
BRW_SAMPLER_OPCODE_GATHER4_PO,
BRW_SAMPLER_OPCODE_GATHER4_PO_PACKED,
BRW_SAMPLER_OPCODE_GATHER4_PO_B,
BRW_SAMPLER_OPCODE_GATHER4_PO_C,
BRW_SAMPLER_OPCODE_GATHER4_PO_C_PACKED,
BRW_SAMPLER_OPCODE_GATHER4_PO_L,
BRW_SAMPLER_OPCODE_GATHER4_PO_L_C,
BRW_SAMPLER_OPCODE_LD2DMS_W,
BRW_SAMPLER_OPCODE_LD2DMS_W_GFX125,
//BRW_SAMPLER_OPCODE_LD2DMS_W_GFX12,
BRW_SAMPLER_OPCODE_LD_MCS,
BRW_SAMPLER_OPCODE_LD2DMS,
BRW_SAMPLER_OPCODE_LD2DSS,
BRW_SAMPLER_OPCODE_MAX,
};
struct brw_sampler_payload_src {
enum brw_sampler_payload_param param;
bool optional;
};
struct brw_sampler_payload_desc {
struct brw_sampler_payload_src sources[12];
};
const char *
brw_sampler_payload_param_name(enum brw_sampler_payload_param param);
const char *
brw_sampler_opcode_name(enum brw_sampler_opcode opcode);
const struct brw_sampler_payload_desc *
brw_get_sampler_payload_desc(enum brw_sampler_opcode opcode);
uint32_t
brw_get_sampler_hw_opcode(enum brw_sampler_opcode opcode);
enum brw_sampler_opcode
brw_get_sampler_opcode_from_tex(const struct intel_device_info *devinfo,
const nir_tex_instr *tex);
bool
brw_sampler_opcode_is_gather(enum brw_sampler_opcode opcode);
static inline int
brw_sampler_opcode_param_index(enum brw_sampler_opcode opcode,
enum brw_sampler_payload_param param)
{
const struct brw_sampler_payload_desc *desc =
brw_get_sampler_payload_desc(opcode);
for (int i = 0; desc->sources[i].param != BRW_SAMPLER_PAYLOAD_PARAM_INVALID; i++) {
if (desc->sources[i].param == param)
return i;
}
return -1;
}
bool
brw_nir_tex_offset_in_constant_range(const nir_tex_instr *tex,
unsigned offset_index);
#ifdef __cplusplus
}
#endif

View file

@ -95,6 +95,8 @@ libintel_compiler_brw_files = files(
'brw_reg_type.c',
'brw_reg_type.h',
'brw_rt.h',
'brw_sampler.c',
'brw_sampler.h',
'brw_schedule_instructions.cpp',
'brw_shader.cpp',
'brw_shader.h',