pan/nir: Lower texturing ops in NIR on Bifrost

Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41036>
This commit is contained in:
Faith Ekstrand 2026-04-16 17:16:33 -04:00 committed by Marge Bot
parent 05a066c921
commit 6c9ffd782b
2 changed files with 3 additions and 678 deletions

View file

@ -3658,35 +3658,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
}
}
/* Returns dimension with 0 special casing cubemaps. Shamelessly copied from
* Midgard */
static unsigned
bifrost_tex_format(enum glsl_sampler_dim dim)
{
switch (dim) {
case GLSL_SAMPLER_DIM_1D:
case GLSL_SAMPLER_DIM_BUF:
return 1;
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_MS:
case GLSL_SAMPLER_DIM_EXTERNAL:
case GLSL_SAMPLER_DIM_RECT:
case GLSL_SAMPLER_DIM_SUBPASS:
case GLSL_SAMPLER_DIM_SUBPASS_MS:
return 2;
case GLSL_SAMPLER_DIM_3D:
return 3;
case GLSL_SAMPLER_DIM_CUBE:
return 0;
default:
UNREACHABLE("Unknown sampler dim type\n");
}
}
static enum bi_dimension
valhall_tex_dimension(enum glsl_sampler_dim dim)
{
@ -3714,555 +3685,6 @@ valhall_tex_dimension(enum glsl_sampler_dim dim)
}
}
static enum bifrost_texture_format_full
bi_texture_format(nir_alu_type T, enum bi_clamp clamp)
{
switch (T) {
case nir_type_float16:
return BIFROST_TEXTURE_FORMAT_F16 + clamp;
case nir_type_float32:
return BIFROST_TEXTURE_FORMAT_F32 + clamp;
case nir_type_uint16:
return BIFROST_TEXTURE_FORMAT_U16;
case nir_type_int16:
return BIFROST_TEXTURE_FORMAT_S16;
case nir_type_uint32:
return BIFROST_TEXTURE_FORMAT_U32;
case nir_type_int32:
return BIFROST_TEXTURE_FORMAT_S32;
default:
UNREACHABLE("Invalid type for texturing");
}
}
/* Array indices are specified as 32-bit uints, need to convert. In .z component
* from NIR */
static bi_index
bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T)
{
/* For (u)int we can just passthrough */
nir_alu_type base = nir_alu_type_get_base_type(T);
if (base == nir_type_int || base == nir_type_uint)
return idx;
/* Otherwise we convert */
assert(T == nir_type_float32);
/* OpenGL ES 3.2 specification section 8.14.2 ("Coordinate Wrapping and
* Texel Selection") defines the layer to be taken from clamp(RNE(r),
* 0, dt - 1). So we use round RTE, clamping is handled at the data
* structure level */
bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx);
I->round = BI_ROUND_NONE;
return I->dest[0];
}
/* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a
* 16-bit 8:8 fixed-point format. We lower as:
*
* F32_TO_S32(clamp(x, -16.0, +16.0) * 256.0) & 0xFFFF =
* MKVEC(F32_TO_S32(clamp(x * 1.0/16.0, -1.0, 1.0) * (16.0 * 256.0)), #0)
*/
static bi_index
bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16)
{
/* Precompute for constant LODs to avoid general constant folding */
if (lod.type == BI_INDEX_CONSTANT) {
uint32_t raw = lod.value;
float x = fp16 ? _mesa_half_to_float(raw) : uif(raw);
int32_t s32 = CLAMP(x, -16.0f, 16.0f) * 256.0f;
return bi_imm_u32(s32 & 0xFFFF);
}
/* Sort of arbitrary. Must be less than 128.0, greater than or equal to
* the max LOD (16 since we cap at 2^16 texture dimensions), and
* preferably small to minimize precision loss */
const float max_lod = 16.0;
bi_instr *fsat =
bi_fma_f32_to(b, bi_temp(b->shader), fp16 ? bi_half(lod, false) : lod,
bi_imm_f32(1.0f / max_lod), bi_negzero());
fsat->clamp = BI_CLAMP_CLAMP_M1_1;
bi_index fmul =
bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f), bi_negzero());
return bi_mkvec_v2i16(b, bi_half(bi_f32_to_s32(b, fmul), false),
bi_imm_u16(0));
}
/* FETCH takes a 32-bit staging register containing the LOD as an integer in
* the bottom 16-bits and (if present) the cube face index in the top 16-bits.
* TODO: Cube face.
*/
static bi_index
bi_emit_texc_lod_cube(bi_builder *b, bi_index lod)
{
return bi_lshift_or_i32(b, lod, bi_zero(), bi_imm_u8(8));
}
/* The hardware specifies texel offsets and multisample indices together as a
* u8vec4 <offset, ms index>. By default all are zero, so if have either a
* nonzero texel offset or a nonzero multisample index, we build a u8vec4 with
* the bits we need and return that to be passed as a staging register. Else we
* return 0 to avoid allocating a data register when everything is zero. */
static bi_index
bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr)
{
bi_index dest = bi_zero();
int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
if (offs_idx >= 0 && !nir_src_is_zero(instr->src[offs_idx].src)) {
unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
bi_index idx = bi_src_index(&instr->src[offs_idx].src);
dest = bi_mkvec_v4i8(
b, (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
(nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
(nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0),
bi_imm_u8(0));
}
int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
if (ms_idx >= 0 && !nir_src_is_zero(instr->src[ms_idx].src)) {
dest = bi_lshift_or_i32(b, bi_src_index(&instr->src[ms_idx].src), dest,
bi_imm_u8(24));
}
return dest;
}
static void
bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index *face, bi_index *s,
bi_index *t)
{
/* Compute max { |x|, |y|, |z| } */
bi_index maxxyz = bi_temp(b->shader);
*face = bi_temp(b->shader);
bi_index cx = bi_extract(b, coord, 0), cy = bi_extract(b, coord, 1),
cz = bi_extract(b, coord, 2);
/* Use a pseudo op on Bifrost due to tuple restrictions */
if (b->shader->arch <= 8) {
bi_cubeface_to(b, maxxyz, *face, cx, cy, cz);
} else {
bi_cubeface1_to(b, maxxyz, cx, cy, cz);
bi_cubeface2_v9_to(b, *face, cx, cy, cz);
}
/* Select coordinates */
bi_index ssel =
bi_cube_ssel(b, bi_extract(b, coord, 2), bi_extract(b, coord, 0), *face);
bi_index tsel =
bi_cube_tsel(b, bi_extract(b, coord, 1), bi_extract(b, coord, 2), *face);
/* The OpenGL ES specification requires us to transform an input vector
* (x, y, z) to the coordinate, given the selected S/T:
*
* (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1))
*
* We implement (s shown, t similar) in a form friendlier to FMA
* instructions, and clamp coordinates at the end for correct
* NaN/infinity handling:
*
* fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5)
*
* Take the reciprocal of max{x, y, z}
*/
bi_index rcp = bi_frcp_f32(b, maxxyz);
/* Calculate 0.5 * (1.0 / max{x, y, z}) */
bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero());
/* Transform the coordinates */
*s = bi_temp(b->shader);
*t = bi_temp(b->shader);
bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f));
bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f));
S->clamp = BI_CLAMP_CLAMP_0_1;
T->clamp = BI_CLAMP_CLAMP_0_1;
}
/* Emits a cube map descriptor, returning lower 32-bits and putting upper
* 32-bits in passed pointer t. The packing of the face with the S coordinate
* exploits the redundancy of floating points with the range restriction of
* CUBEFACE output.
*
* struct cube_map_descriptor {
* float s : 29;
* unsigned face : 3;
* float t : 32;
* }
*
* Since the cube face index is preshifted, this is easy to pack with a bitwise
* MUX.i32 and a fixed mask, selecting the lower bits 29 from s and the upper 3
* bits from face.
*/
static bi_index
bi_emit_texc_cube_coord(bi_builder *b, bi_index coord, bi_index *t)
{
bi_index face, s;
bi_emit_cube_coord(b, coord, &face, &s, t);
bi_index mask = bi_imm_u32(BITFIELD_MASK(29));
return bi_mux_i32(b, s, face, mask, BI_MUX_BIT);
}
/* Map to the main texture op used. Some of these (txd in particular) will
* lower to multiple texture ops with different opcodes (GRDESC_DER + TEX in
* sequence). We assume that lowering is handled elsewhere.
*/
static enum bifrost_tex_op
bi_tex_op(nir_texop op)
{
switch (op) {
case nir_texop_tex:
case nir_texop_txb:
case nir_texop_txl:
case nir_texop_txd:
return BIFROST_TEX_OP_TEX;
case nir_texop_txf:
case nir_texop_txf_ms:
case nir_texop_tg4:
return BIFROST_TEX_OP_FETCH;
case nir_texop_lod:
return BIFROST_TEX_OP_GRDESC;
case nir_texop_txs:
case nir_texop_query_levels:
case nir_texop_texture_samples:
case nir_texop_samples_identical:
UNREACHABLE("should've been lowered");
default:
UNREACHABLE("unsupported tex op");
}
}
/* Data registers required by texturing in the order they appear. All are
* optional, the texture operation descriptor determines which are present.
* Note since 3D arrays are not permitted at an API level, Z_COORD and
* ARRAY/SHADOW are exlusive, so TEXC in practice reads at most 8 registers */
enum bifrost_tex_dreg {
BIFROST_TEX_DREG_Z_COORD = 0,
BIFROST_TEX_DREG_Y_DELTAS = 1,
BIFROST_TEX_DREG_LOD = 2,
BIFROST_TEX_DREG_GRDESC_HI = 3,
BIFROST_TEX_DREG_SHADOW = 4,
BIFROST_TEX_DREG_ARRAY = 5,
BIFROST_TEX_DREG_OFFSETMS = 6,
BIFROST_TEX_DREG_SAMPLER = 7,
BIFROST_TEX_DREG_TEXTURE = 8,
BIFROST_TEX_DREG_COUNT,
};
static void
bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
{
assert((instr->op != nir_texop_txf ||
instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) &&
"Texel buffers should already have been lowered");
struct bifrost_texture_operation desc = {
.op = bi_tex_op(instr->op),
.offset_or_bias_disable = false, /* TODO */
.shadow_or_clamp_disable = instr->is_shadow,
.array = instr->is_array && instr->op != nir_texop_lod,
.dimension = bifrost_tex_format(instr->sampler_dim),
.format = bi_texture_format(instr->dest_type | instr->def.bit_size,
BI_CLAMP_NONE), /* TODO */
.mask = 0xF,
};
switch (desc.op) {
case BIFROST_TEX_OP_TEX:
desc.lod_or_fetch = BIFROST_LOD_MODE_COMPUTE;
break;
case BIFROST_TEX_OP_FETCH:
desc.lod_or_fetch = (enum bifrost_lod_mode)(
instr->op == nir_texop_tg4
? BIFROST_TEXTURE_FETCH_GATHER4_R + instr->component
: BIFROST_TEXTURE_FETCH_TEXEL);
break;
case BIFROST_TEX_OP_GRDESC:
break;
default:
UNREACHABLE("texture op unsupported");
}
/* 32-bit indices to be allocated as consecutive staging registers */
bi_index dregs[BIFROST_TEX_DREG_COUNT] = {};
bi_index cx = bi_null(), cy = bi_null();
bi_index ddx = bi_null();
bi_index ddy = bi_null();
for (unsigned i = 0; i < instr->num_srcs; ++i) {
bi_index index = bi_src_index(&instr->src[i].src);
unsigned sz = nir_src_bit_size(instr->src[i].src);
unsigned components = nir_src_num_components(instr->src[i].src);
ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
nir_alu_type T = base | sz;
switch (instr->src[i].src_type) {
case nir_tex_src_coord:
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
cx = bi_emit_texc_cube_coord(b, index, &cy);
} else {
/* Copy XY (for 2D+) or XX (for 1D) */
cx = bi_extract(b, index, 0);
cy = bi_extract(b, index, MIN2(1, components - 1));
assert(components >= 1 && components <= 3);
if (components == 3 && !desc.array) {
/* 3D */
dregs[BIFROST_TEX_DREG_Z_COORD] = bi_extract(b, index, 2);
}
}
if (desc.array) {
dregs[BIFROST_TEX_DREG_ARRAY] = bi_emit_texc_array_index(
b, bi_extract(b, index, components - 1), T);
}
break;
case nir_tex_src_lod:
if (desc.op == BIFROST_TEX_OP_TEX &&
nir_src_is_zero(instr->src[i].src)) {
desc.lod_or_fetch = BIFROST_LOD_MODE_ZERO;
} else if (desc.op == BIFROST_TEX_OP_TEX) {
assert(base == nir_type_float);
assert(sz == 16 || sz == 32);
dregs[BIFROST_TEX_DREG_LOD] =
bi_emit_texc_lod_88(b, index, sz == 16);
desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
} else {
assert(desc.op == BIFROST_TEX_OP_FETCH);
assert(base == nir_type_uint || base == nir_type_int);
assert(sz == 16 || sz == 32);
dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, index);
}
break;
case nir_tex_src_ddx:
ddx = index;
break;
case nir_tex_src_ddy:
ddy = index;
break;
case nir_tex_src_bias:
/* Upper 16-bits interpreted as a clamp, leave zero */
assert(desc.op == BIFROST_TEX_OP_TEX);
assert(base == nir_type_float);
assert(sz == 16 || sz == 32);
dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_88(b, index, sz == 16);
desc.lod_or_fetch = BIFROST_LOD_MODE_BIAS;
break;
case nir_tex_src_ms_index:
case nir_tex_src_offset:
if (desc.offset_or_bias_disable)
break;
dregs[BIFROST_TEX_DREG_OFFSETMS] =
bi_emit_texc_offset_ms_index(b, instr);
if (!bi_is_equiv(dregs[BIFROST_TEX_DREG_OFFSETMS], bi_zero()))
desc.offset_or_bias_disable = true;
break;
case nir_tex_src_comparator:
dregs[BIFROST_TEX_DREG_SHADOW] = index;
break;
case nir_tex_src_texture_offset:
dregs[BIFROST_TEX_DREG_TEXTURE] = index;
break;
case nir_tex_src_sampler_offset:
dregs[BIFROST_TEX_DREG_SAMPLER] = index;
break;
default:
UNREACHABLE("Unhandled src type in texc emit");
}
}
if (desc.op == BIFROST_TEX_OP_FETCH &&
bi_is_null(dregs[BIFROST_TEX_DREG_LOD])) {
dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, bi_zero());
}
/* Choose an index mode */
bool direct_tex = bi_is_null(dregs[BIFROST_TEX_DREG_TEXTURE]);
bool direct_samp = bi_is_null(dregs[BIFROST_TEX_DREG_SAMPLER]);
bool direct = direct_tex && direct_samp;
desc.immediate_indices =
direct && (instr->sampler_index < 16 && instr->texture_index < 128);
if (desc.immediate_indices) {
desc.sampler_index_or_mode = instr->sampler_index;
desc.index = instr->texture_index;
} else {
unsigned mode = 0;
if (direct && instr->sampler_index == instr->texture_index &&
instr->sampler_index < 128) {
mode = BIFROST_INDEX_IMMEDIATE_SHARED;
desc.index = instr->texture_index;
} else if (direct && instr->sampler_index < 128) {
mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
desc.index = instr->sampler_index;
dregs[BIFROST_TEX_DREG_TEXTURE] =
bi_mov_i32(b, bi_imm_u32(instr->texture_index));
} else if (direct_tex && instr->texture_index < 128) {
mode = BIFROST_INDEX_IMMEDIATE_TEXTURE;
desc.index = instr->texture_index;
if (direct_samp) {
dregs[BIFROST_TEX_DREG_SAMPLER] =
bi_mov_i32(b, bi_imm_u32(instr->sampler_index));
}
} else if (direct_samp && instr->sampler_index < 128) {
mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
desc.index = instr->sampler_index;
if (direct_tex) {
dregs[BIFROST_TEX_DREG_TEXTURE] =
bi_mov_i32(b, bi_imm_u32(instr->texture_index));
}
} else {
mode = BIFROST_INDEX_REGISTER;
if (direct_tex) {
dregs[BIFROST_TEX_DREG_TEXTURE] =
bi_mov_i32(b, bi_imm_u32(instr->texture_index));
}
if (direct_samp) {
dregs[BIFROST_TEX_DREG_SAMPLER] =
bi_mov_i32(b, bi_imm_u32(instr->sampler_index));
}
}
mode |= (BIFROST_TEXTURE_OPERATION_SINGLE << 2);
desc.sampler_index_or_mode = mode;
}
if (!bi_is_null(ddx) || !bi_is_null(ddy)) {
assert(!bi_is_null(ddx) && !bi_is_null(ddy));
struct bifrost_texture_operation gropdesc = {
.sampler_index_or_mode = desc.sampler_index_or_mode,
.index = desc.index,
.immediate_indices = desc.immediate_indices,
.op = BIFROST_TEX_OP_GRDESC_DER,
.offset_or_bias_disable = true,
.shadow_or_clamp_disable = true,
.array = false,
.dimension = desc.dimension,
.format = desc.format,
.mask = desc.mask,
};
unsigned coords_comp_count =
instr->coord_components -
(instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
bi_index derivs[4];
unsigned sr_count = 0;
if (coords_comp_count > 2)
derivs[sr_count++] = bi_extract(b, ddx, 2);
derivs[sr_count++] = bi_extract(b, ddy, 0);
if (coords_comp_count > 1)
derivs[sr_count++] = bi_extract(b, ddy, 1);
if (coords_comp_count > 2)
derivs[sr_count++] = bi_extract(b, ddy, 2);
bi_index derivs_packed = bi_temp(b->shader);
bi_make_vec_to(b, derivs_packed, derivs, NULL, sr_count, 32);
bi_index grdesc = bi_temp(b->shader);
bi_instr *I =
bi_texc_to(b, grdesc, derivs_packed, bi_extract(b, ddx, 0),
coords_comp_count > 1 ? bi_extract(b, ddx, 1) : bi_zero(),
bi_imm_u32(gropdesc.packed), true, sr_count, 0);
I->register_format = BI_REGISTER_FORMAT_U32;
bi_emit_cached_split_i32(b, grdesc, 4);
dregs[BIFROST_TEX_DREG_LOD] = bi_extract(b, grdesc, 0);
desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
}
/* Allocate staging registers contiguously by compacting the array. */
unsigned sr_count = 0;
for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) {
if (!bi_is_null(dregs[i]))
dregs[sr_count++] = dregs[i];
}
unsigned res_size = instr->def.bit_size == 16 ? 2 : 4;
bi_index sr = sr_count ? bi_temp(b->shader) : bi_null();
if (sr_count)
bi_emit_collect_to(b, sr, dregs, sr_count);
if (instr->op == nir_texop_lod) {
assert(instr->def.num_components == 2 && instr->def.bit_size == 32);
bi_index res[2];
for (unsigned i = 0; i < 2; i++) {
desc.shadow_or_clamp_disable = i != 0;
bi_index grdesc = bi_temp(b->shader);
bi_instr *I = bi_texc_to(b, grdesc, sr, cx, cy,
bi_imm_u32(desc.packed), false, sr_count, 0);
I->register_format = BI_REGISTER_FORMAT_U32;
bi_emit_cached_split_i32(b, grdesc, 4);
bi_index lod = bi_s16_to_f32(b, bi_half(bi_extract(b, grdesc, 0), 0));
lod = bi_fmul_f32(b, lod, bi_imm_f32(1.0f / 256));
if (i == 0)
lod = bi_fround_f32(b, lod, BI_ROUND_NONE);
res[i] = lod;
}
bi_make_vec_to(b, bi_def_index(&instr->def), res, NULL, 2, 32);
return;
}
bi_index dst = bi_temp(b->shader);
bi_instr *I =
bi_texc_to(b, dst, sr, cx, cy, bi_imm_u32(desc.packed),
!nir_tex_instr_has_implicit_derivative(instr), sr_count, 0);
I->register_format = bi_reg_fmt_for_nir(instr->dest_type);
bi_index w[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
bi_emit_split_i32(b, w, dst, res_size);
bi_emit_collect_to(b, bi_def_index(&instr->def), w,
DIV_ROUND_UP(instr->def.num_components * res_size, 4));
}
static void
bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *tex)
{
@ -4351,103 +3773,6 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *tex)
tex->def.num_components, tex->def.bit_size);
}
/* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube
* textures with sufficiently small immediate indices. Anything else
* needs a complete texture op. */
static void
bi_emit_texs(bi_builder *b, nir_tex_instr *instr)
{
int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
assert(coord_idx >= 0);
bi_index coords = bi_src_index(&instr->src[coord_idx].src);
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
bi_index face, s, t;
bi_emit_cube_coord(b, coords, &face, &s, &t);
bi_texs_cube_to(b, instr->def.bit_size, bi_def_index(&instr->def), s, t,
face, instr->sampler_index, instr->texture_index);
} else {
bi_texs_2d_to(b, instr->def.bit_size, bi_def_index(&instr->def),
bi_extract(b, coords, 0), bi_extract(b, coords, 1),
instr->op != nir_texop_tex, /* zero LOD */
instr->sampler_index, instr->texture_index);
}
bi_split_def(b, &instr->def);
}
static bool
bi_is_simple_tex(nir_tex_instr *instr)
{
if (instr->op != nir_texop_tex && instr->op != nir_texop_txl)
return false;
if (instr->dest_type != nir_type_float32 &&
instr->dest_type != nir_type_float16)
return false;
if (instr->is_shadow || instr->is_array)
return false;
switch (instr->sampler_dim) {
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_EXTERNAL:
case GLSL_SAMPLER_DIM_RECT:
break;
case GLSL_SAMPLER_DIM_CUBE:
/* LOD can't be specified with TEXS_CUBE */
if (instr->op == nir_texop_txl)
return false;
break;
default:
return false;
}
for (unsigned i = 0; i < instr->num_srcs; ++i) {
if (instr->src[i].src_type != nir_tex_src_lod &&
instr->src[i].src_type != nir_tex_src_coord)
return false;
}
/* Indices need to fit in provided bits */
unsigned idx_bits = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE ? 2 : 3;
if (MAX2(instr->sampler_index, instr->texture_index) >= (1 << idx_bits))
return false;
int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
if (lod_idx < 0)
return true;
nir_src lod = instr->src[lod_idx].src;
return nir_src_is_zero(lod);
}
static void
bi_emit_tex(bi_builder *b, nir_tex_instr *instr)
{
/* If txf is used, we assume there is a valid sampler bound at index 0. Use
* it for txf operations, since there may be no other valid samplers. This is
* a workaround: txf does not require a sampler in NIR (so sampler_index is
* undefined) but we need one in the hardware. This is ABI with the driver.
*
* On Valhall, as the descriptor table is encoded in the index, this should
* be handled by the driver.
*/
if (!nir_tex_instr_need_sampler(instr) && b->shader->arch < 9)
instr->sampler_index = 0;
if (b->shader->arch >= 9)
bi_emit_tex_valhall(b, instr);
else if (bi_is_simple_tex(instr))
bi_emit_texs(b, instr);
else
bi_emit_texc(b, instr);
}
static void
bi_emit_phi(bi_builder *b, nir_phi_instr *instr)
{
@ -4514,7 +3839,8 @@ bi_emit_instr(bi_builder *b, struct nir_instr *instr)
break;
case nir_instr_type_tex:
bi_emit_tex(b, nir_instr_as_tex(instr));
assert(b->shader->arch >= 9);
bi_emit_tex_valhall(b, nir_instr_as_tex(instr));
break;
case nir_instr_type_jump:

View file

@ -943,8 +943,7 @@ bifrost_postprocess_nir(nir_shader *nir,
&info->vs.needs_extended_fifo);
}
if (pan_arch(gpu_id) >= 9)
NIR_PASS(_, nir, pan_nir_lower_tex, gpu_id);
NIR_PASS(_, nir, pan_nir_lower_tex, gpu_id);
/* Our OpenCL compiler (src/panfrost/clc/pan_compile.c) has a very weird and
* suboptimal optimization pipeline that results in a lot of unoptimized