mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 11:28:05 +02:00
pan/nir: Lower texturing ops in NIR on Bifrost
Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41036>
This commit is contained in:
parent
05a066c921
commit
6c9ffd782b
2 changed files with 3 additions and 678 deletions
|
|
@ -3658,35 +3658,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
|||
}
|
||||
}
|
||||
|
||||
/* Returns dimension with 0 special casing cubemaps. Shamelessly copied from
|
||||
* Midgard */
|
||||
static unsigned
|
||||
bifrost_tex_format(enum glsl_sampler_dim dim)
|
||||
{
|
||||
switch (dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
case GLSL_SAMPLER_DIM_BUF:
|
||||
return 1;
|
||||
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
case GLSL_SAMPLER_DIM_EXTERNAL:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_SUBPASS:
|
||||
case GLSL_SAMPLER_DIM_SUBPASS_MS:
|
||||
return 2;
|
||||
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
return 3;
|
||||
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
return 0;
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unknown sampler dim type\n");
|
||||
}
|
||||
}
|
||||
|
||||
static enum bi_dimension
|
||||
valhall_tex_dimension(enum glsl_sampler_dim dim)
|
||||
{
|
||||
|
|
@ -3714,555 +3685,6 @@ valhall_tex_dimension(enum glsl_sampler_dim dim)
|
|||
}
|
||||
}
|
||||
|
||||
static enum bifrost_texture_format_full
|
||||
bi_texture_format(nir_alu_type T, enum bi_clamp clamp)
|
||||
{
|
||||
switch (T) {
|
||||
case nir_type_float16:
|
||||
return BIFROST_TEXTURE_FORMAT_F16 + clamp;
|
||||
case nir_type_float32:
|
||||
return BIFROST_TEXTURE_FORMAT_F32 + clamp;
|
||||
case nir_type_uint16:
|
||||
return BIFROST_TEXTURE_FORMAT_U16;
|
||||
case nir_type_int16:
|
||||
return BIFROST_TEXTURE_FORMAT_S16;
|
||||
case nir_type_uint32:
|
||||
return BIFROST_TEXTURE_FORMAT_U32;
|
||||
case nir_type_int32:
|
||||
return BIFROST_TEXTURE_FORMAT_S32;
|
||||
default:
|
||||
UNREACHABLE("Invalid type for texturing");
|
||||
}
|
||||
}
|
||||
|
||||
/* Array indices are specified as 32-bit uints, need to convert. In .z component
|
||||
* from NIR */
|
||||
static bi_index
|
||||
bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T)
|
||||
{
|
||||
/* For (u)int we can just passthrough */
|
||||
nir_alu_type base = nir_alu_type_get_base_type(T);
|
||||
if (base == nir_type_int || base == nir_type_uint)
|
||||
return idx;
|
||||
|
||||
/* Otherwise we convert */
|
||||
assert(T == nir_type_float32);
|
||||
|
||||
/* OpenGL ES 3.2 specification section 8.14.2 ("Coordinate Wrapping and
|
||||
* Texel Selection") defines the layer to be taken from clamp(RNE(r),
|
||||
* 0, dt - 1). So we use round RTE, clamping is handled at the data
|
||||
* structure level */
|
||||
|
||||
bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx);
|
||||
I->round = BI_ROUND_NONE;
|
||||
return I->dest[0];
|
||||
}
|
||||
|
||||
/* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a
|
||||
* 16-bit 8:8 fixed-point format. We lower as:
|
||||
*
|
||||
* F32_TO_S32(clamp(x, -16.0, +16.0) * 256.0) & 0xFFFF =
|
||||
* MKVEC(F32_TO_S32(clamp(x * 1.0/16.0, -1.0, 1.0) * (16.0 * 256.0)), #0)
|
||||
*/
|
||||
|
||||
static bi_index
|
||||
bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16)
|
||||
{
|
||||
/* Precompute for constant LODs to avoid general constant folding */
|
||||
if (lod.type == BI_INDEX_CONSTANT) {
|
||||
uint32_t raw = lod.value;
|
||||
float x = fp16 ? _mesa_half_to_float(raw) : uif(raw);
|
||||
int32_t s32 = CLAMP(x, -16.0f, 16.0f) * 256.0f;
|
||||
return bi_imm_u32(s32 & 0xFFFF);
|
||||
}
|
||||
|
||||
/* Sort of arbitrary. Must be less than 128.0, greater than or equal to
|
||||
* the max LOD (16 since we cap at 2^16 texture dimensions), and
|
||||
* preferably small to minimize precision loss */
|
||||
const float max_lod = 16.0;
|
||||
|
||||
bi_instr *fsat =
|
||||
bi_fma_f32_to(b, bi_temp(b->shader), fp16 ? bi_half(lod, false) : lod,
|
||||
bi_imm_f32(1.0f / max_lod), bi_negzero());
|
||||
|
||||
fsat->clamp = BI_CLAMP_CLAMP_M1_1;
|
||||
|
||||
bi_index fmul =
|
||||
bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f), bi_negzero());
|
||||
|
||||
return bi_mkvec_v2i16(b, bi_half(bi_f32_to_s32(b, fmul), false),
|
||||
bi_imm_u16(0));
|
||||
}
|
||||
|
||||
/* FETCH takes a 32-bit staging register containing the LOD as an integer in
|
||||
* the bottom 16-bits and (if present) the cube face index in the top 16-bits.
|
||||
* TODO: Cube face.
|
||||
*/
|
||||
|
||||
static bi_index
|
||||
bi_emit_texc_lod_cube(bi_builder *b, bi_index lod)
|
||||
{
|
||||
return bi_lshift_or_i32(b, lod, bi_zero(), bi_imm_u8(8));
|
||||
}
|
||||
|
||||
/* The hardware specifies texel offsets and multisample indices together as a
|
||||
* u8vec4 <offset, ms index>. By default all are zero, so if have either a
|
||||
* nonzero texel offset or a nonzero multisample index, we build a u8vec4 with
|
||||
* the bits we need and return that to be passed as a staging register. Else we
|
||||
* return 0 to avoid allocating a data register when everything is zero. */
|
||||
|
||||
static bi_index
|
||||
bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr)
|
||||
{
|
||||
bi_index dest = bi_zero();
|
||||
|
||||
int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
|
||||
if (offs_idx >= 0 && !nir_src_is_zero(instr->src[offs_idx].src)) {
|
||||
unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
|
||||
bi_index idx = bi_src_index(&instr->src[offs_idx].src);
|
||||
dest = bi_mkvec_v4i8(
|
||||
b, (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
|
||||
(nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
|
||||
(nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0),
|
||||
bi_imm_u8(0));
|
||||
}
|
||||
|
||||
int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
|
||||
if (ms_idx >= 0 && !nir_src_is_zero(instr->src[ms_idx].src)) {
|
||||
dest = bi_lshift_or_i32(b, bi_src_index(&instr->src[ms_idx].src), dest,
|
||||
bi_imm_u8(24));
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index *face, bi_index *s,
|
||||
bi_index *t)
|
||||
{
|
||||
/* Compute max { |x|, |y|, |z| } */
|
||||
bi_index maxxyz = bi_temp(b->shader);
|
||||
*face = bi_temp(b->shader);
|
||||
|
||||
bi_index cx = bi_extract(b, coord, 0), cy = bi_extract(b, coord, 1),
|
||||
cz = bi_extract(b, coord, 2);
|
||||
|
||||
/* Use a pseudo op on Bifrost due to tuple restrictions */
|
||||
if (b->shader->arch <= 8) {
|
||||
bi_cubeface_to(b, maxxyz, *face, cx, cy, cz);
|
||||
} else {
|
||||
bi_cubeface1_to(b, maxxyz, cx, cy, cz);
|
||||
bi_cubeface2_v9_to(b, *face, cx, cy, cz);
|
||||
}
|
||||
|
||||
/* Select coordinates */
|
||||
bi_index ssel =
|
||||
bi_cube_ssel(b, bi_extract(b, coord, 2), bi_extract(b, coord, 0), *face);
|
||||
bi_index tsel =
|
||||
bi_cube_tsel(b, bi_extract(b, coord, 1), bi_extract(b, coord, 2), *face);
|
||||
|
||||
/* The OpenGL ES specification requires us to transform an input vector
|
||||
* (x, y, z) to the coordinate, given the selected S/T:
|
||||
*
|
||||
* (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1))
|
||||
*
|
||||
* We implement (s shown, t similar) in a form friendlier to FMA
|
||||
* instructions, and clamp coordinates at the end for correct
|
||||
* NaN/infinity handling:
|
||||
*
|
||||
* fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5)
|
||||
*
|
||||
* Take the reciprocal of max{x, y, z}
|
||||
*/
|
||||
bi_index rcp = bi_frcp_f32(b, maxxyz);
|
||||
|
||||
/* Calculate 0.5 * (1.0 / max{x, y, z}) */
|
||||
bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero());
|
||||
|
||||
/* Transform the coordinates */
|
||||
*s = bi_temp(b->shader);
|
||||
*t = bi_temp(b->shader);
|
||||
|
||||
bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f));
|
||||
bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f));
|
||||
|
||||
S->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
T->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
}
|
||||
|
||||
/* Emits a cube map descriptor, returning lower 32-bits and putting upper
|
||||
* 32-bits in passed pointer t. The packing of the face with the S coordinate
|
||||
* exploits the redundancy of floating points with the range restriction of
|
||||
* CUBEFACE output.
|
||||
*
|
||||
* struct cube_map_descriptor {
|
||||
* float s : 29;
|
||||
* unsigned face : 3;
|
||||
* float t : 32;
|
||||
* }
|
||||
*
|
||||
* Since the cube face index is preshifted, this is easy to pack with a bitwise
|
||||
* MUX.i32 and a fixed mask, selecting the lower bits 29 from s and the upper 3
|
||||
* bits from face.
|
||||
*/
|
||||
|
||||
static bi_index
|
||||
bi_emit_texc_cube_coord(bi_builder *b, bi_index coord, bi_index *t)
|
||||
{
|
||||
bi_index face, s;
|
||||
bi_emit_cube_coord(b, coord, &face, &s, t);
|
||||
bi_index mask = bi_imm_u32(BITFIELD_MASK(29));
|
||||
return bi_mux_i32(b, s, face, mask, BI_MUX_BIT);
|
||||
}
|
||||
|
||||
/* Map to the main texture op used. Some of these (txd in particular) will
|
||||
* lower to multiple texture ops with different opcodes (GRDESC_DER + TEX in
|
||||
* sequence). We assume that lowering is handled elsewhere.
|
||||
*/
|
||||
|
||||
static enum bifrost_tex_op
|
||||
bi_tex_op(nir_texop op)
|
||||
{
|
||||
switch (op) {
|
||||
case nir_texop_tex:
|
||||
case nir_texop_txb:
|
||||
case nir_texop_txl:
|
||||
case nir_texop_txd:
|
||||
return BIFROST_TEX_OP_TEX;
|
||||
case nir_texop_txf:
|
||||
case nir_texop_txf_ms:
|
||||
case nir_texop_tg4:
|
||||
return BIFROST_TEX_OP_FETCH;
|
||||
case nir_texop_lod:
|
||||
return BIFROST_TEX_OP_GRDESC;
|
||||
case nir_texop_txs:
|
||||
case nir_texop_query_levels:
|
||||
case nir_texop_texture_samples:
|
||||
case nir_texop_samples_identical:
|
||||
UNREACHABLE("should've been lowered");
|
||||
default:
|
||||
UNREACHABLE("unsupported tex op");
|
||||
}
|
||||
}
|
||||
|
||||
/* Data registers required by texturing in the order they appear. All are
|
||||
* optional, the texture operation descriptor determines which are present.
|
||||
* Note since 3D arrays are not permitted at an API level, Z_COORD and
|
||||
* ARRAY/SHADOW are exlusive, so TEXC in practice reads at most 8 registers */
|
||||
|
||||
enum bifrost_tex_dreg {
|
||||
BIFROST_TEX_DREG_Z_COORD = 0,
|
||||
BIFROST_TEX_DREG_Y_DELTAS = 1,
|
||||
BIFROST_TEX_DREG_LOD = 2,
|
||||
BIFROST_TEX_DREG_GRDESC_HI = 3,
|
||||
BIFROST_TEX_DREG_SHADOW = 4,
|
||||
BIFROST_TEX_DREG_ARRAY = 5,
|
||||
BIFROST_TEX_DREG_OFFSETMS = 6,
|
||||
BIFROST_TEX_DREG_SAMPLER = 7,
|
||||
BIFROST_TEX_DREG_TEXTURE = 8,
|
||||
BIFROST_TEX_DREG_COUNT,
|
||||
};
|
||||
|
||||
static void
|
||||
bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
|
||||
{
|
||||
assert((instr->op != nir_texop_txf ||
|
||||
instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) &&
|
||||
"Texel buffers should already have been lowered");
|
||||
|
||||
struct bifrost_texture_operation desc = {
|
||||
.op = bi_tex_op(instr->op),
|
||||
.offset_or_bias_disable = false, /* TODO */
|
||||
.shadow_or_clamp_disable = instr->is_shadow,
|
||||
.array = instr->is_array && instr->op != nir_texop_lod,
|
||||
.dimension = bifrost_tex_format(instr->sampler_dim),
|
||||
.format = bi_texture_format(instr->dest_type | instr->def.bit_size,
|
||||
BI_CLAMP_NONE), /* TODO */
|
||||
.mask = 0xF,
|
||||
};
|
||||
|
||||
switch (desc.op) {
|
||||
case BIFROST_TEX_OP_TEX:
|
||||
desc.lod_or_fetch = BIFROST_LOD_MODE_COMPUTE;
|
||||
break;
|
||||
case BIFROST_TEX_OP_FETCH:
|
||||
desc.lod_or_fetch = (enum bifrost_lod_mode)(
|
||||
instr->op == nir_texop_tg4
|
||||
? BIFROST_TEXTURE_FETCH_GATHER4_R + instr->component
|
||||
: BIFROST_TEXTURE_FETCH_TEXEL);
|
||||
break;
|
||||
case BIFROST_TEX_OP_GRDESC:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("texture op unsupported");
|
||||
}
|
||||
|
||||
/* 32-bit indices to be allocated as consecutive staging registers */
|
||||
bi_index dregs[BIFROST_TEX_DREG_COUNT] = {};
|
||||
bi_index cx = bi_null(), cy = bi_null();
|
||||
bi_index ddx = bi_null();
|
||||
bi_index ddy = bi_null();
|
||||
|
||||
for (unsigned i = 0; i < instr->num_srcs; ++i) {
|
||||
bi_index index = bi_src_index(&instr->src[i].src);
|
||||
unsigned sz = nir_src_bit_size(instr->src[i].src);
|
||||
unsigned components = nir_src_num_components(instr->src[i].src);
|
||||
ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
|
||||
nir_alu_type T = base | sz;
|
||||
|
||||
switch (instr->src[i].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
|
||||
cx = bi_emit_texc_cube_coord(b, index, &cy);
|
||||
} else {
|
||||
/* Copy XY (for 2D+) or XX (for 1D) */
|
||||
cx = bi_extract(b, index, 0);
|
||||
cy = bi_extract(b, index, MIN2(1, components - 1));
|
||||
|
||||
assert(components >= 1 && components <= 3);
|
||||
|
||||
if (components == 3 && !desc.array) {
|
||||
/* 3D */
|
||||
dregs[BIFROST_TEX_DREG_Z_COORD] = bi_extract(b, index, 2);
|
||||
}
|
||||
}
|
||||
|
||||
if (desc.array) {
|
||||
dregs[BIFROST_TEX_DREG_ARRAY] = bi_emit_texc_array_index(
|
||||
b, bi_extract(b, index, components - 1), T);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case nir_tex_src_lod:
|
||||
if (desc.op == BIFROST_TEX_OP_TEX &&
|
||||
nir_src_is_zero(instr->src[i].src)) {
|
||||
desc.lod_or_fetch = BIFROST_LOD_MODE_ZERO;
|
||||
} else if (desc.op == BIFROST_TEX_OP_TEX) {
|
||||
assert(base == nir_type_float);
|
||||
|
||||
assert(sz == 16 || sz == 32);
|
||||
dregs[BIFROST_TEX_DREG_LOD] =
|
||||
bi_emit_texc_lod_88(b, index, sz == 16);
|
||||
desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
|
||||
} else {
|
||||
assert(desc.op == BIFROST_TEX_OP_FETCH);
|
||||
assert(base == nir_type_uint || base == nir_type_int);
|
||||
assert(sz == 16 || sz == 32);
|
||||
|
||||
dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, index);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case nir_tex_src_ddx:
|
||||
ddx = index;
|
||||
break;
|
||||
|
||||
case nir_tex_src_ddy:
|
||||
ddy = index;
|
||||
break;
|
||||
|
||||
case nir_tex_src_bias:
|
||||
/* Upper 16-bits interpreted as a clamp, leave zero */
|
||||
assert(desc.op == BIFROST_TEX_OP_TEX);
|
||||
assert(base == nir_type_float);
|
||||
assert(sz == 16 || sz == 32);
|
||||
dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_88(b, index, sz == 16);
|
||||
desc.lod_or_fetch = BIFROST_LOD_MODE_BIAS;
|
||||
break;
|
||||
|
||||
case nir_tex_src_ms_index:
|
||||
case nir_tex_src_offset:
|
||||
if (desc.offset_or_bias_disable)
|
||||
break;
|
||||
|
||||
dregs[BIFROST_TEX_DREG_OFFSETMS] =
|
||||
bi_emit_texc_offset_ms_index(b, instr);
|
||||
if (!bi_is_equiv(dregs[BIFROST_TEX_DREG_OFFSETMS], bi_zero()))
|
||||
desc.offset_or_bias_disable = true;
|
||||
break;
|
||||
|
||||
case nir_tex_src_comparator:
|
||||
dregs[BIFROST_TEX_DREG_SHADOW] = index;
|
||||
break;
|
||||
|
||||
case nir_tex_src_texture_offset:
|
||||
dregs[BIFROST_TEX_DREG_TEXTURE] = index;
|
||||
break;
|
||||
|
||||
case nir_tex_src_sampler_offset:
|
||||
dregs[BIFROST_TEX_DREG_SAMPLER] = index;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unhandled src type in texc emit");
|
||||
}
|
||||
}
|
||||
|
||||
if (desc.op == BIFROST_TEX_OP_FETCH &&
|
||||
bi_is_null(dregs[BIFROST_TEX_DREG_LOD])) {
|
||||
dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, bi_zero());
|
||||
}
|
||||
|
||||
/* Choose an index mode */
|
||||
|
||||
bool direct_tex = bi_is_null(dregs[BIFROST_TEX_DREG_TEXTURE]);
|
||||
bool direct_samp = bi_is_null(dregs[BIFROST_TEX_DREG_SAMPLER]);
|
||||
bool direct = direct_tex && direct_samp;
|
||||
|
||||
desc.immediate_indices =
|
||||
direct && (instr->sampler_index < 16 && instr->texture_index < 128);
|
||||
|
||||
if (desc.immediate_indices) {
|
||||
desc.sampler_index_or_mode = instr->sampler_index;
|
||||
desc.index = instr->texture_index;
|
||||
} else {
|
||||
unsigned mode = 0;
|
||||
|
||||
if (direct && instr->sampler_index == instr->texture_index &&
|
||||
instr->sampler_index < 128) {
|
||||
mode = BIFROST_INDEX_IMMEDIATE_SHARED;
|
||||
desc.index = instr->texture_index;
|
||||
} else if (direct && instr->sampler_index < 128) {
|
||||
mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
|
||||
desc.index = instr->sampler_index;
|
||||
dregs[BIFROST_TEX_DREG_TEXTURE] =
|
||||
bi_mov_i32(b, bi_imm_u32(instr->texture_index));
|
||||
} else if (direct_tex && instr->texture_index < 128) {
|
||||
mode = BIFROST_INDEX_IMMEDIATE_TEXTURE;
|
||||
desc.index = instr->texture_index;
|
||||
|
||||
if (direct_samp) {
|
||||
dregs[BIFROST_TEX_DREG_SAMPLER] =
|
||||
bi_mov_i32(b, bi_imm_u32(instr->sampler_index));
|
||||
}
|
||||
} else if (direct_samp && instr->sampler_index < 128) {
|
||||
mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
|
||||
desc.index = instr->sampler_index;
|
||||
|
||||
if (direct_tex) {
|
||||
dregs[BIFROST_TEX_DREG_TEXTURE] =
|
||||
bi_mov_i32(b, bi_imm_u32(instr->texture_index));
|
||||
}
|
||||
} else {
|
||||
mode = BIFROST_INDEX_REGISTER;
|
||||
|
||||
if (direct_tex) {
|
||||
dregs[BIFROST_TEX_DREG_TEXTURE] =
|
||||
bi_mov_i32(b, bi_imm_u32(instr->texture_index));
|
||||
}
|
||||
|
||||
if (direct_samp) {
|
||||
dregs[BIFROST_TEX_DREG_SAMPLER] =
|
||||
bi_mov_i32(b, bi_imm_u32(instr->sampler_index));
|
||||
}
|
||||
}
|
||||
|
||||
mode |= (BIFROST_TEXTURE_OPERATION_SINGLE << 2);
|
||||
desc.sampler_index_or_mode = mode;
|
||||
}
|
||||
|
||||
if (!bi_is_null(ddx) || !bi_is_null(ddy)) {
|
||||
assert(!bi_is_null(ddx) && !bi_is_null(ddy));
|
||||
struct bifrost_texture_operation gropdesc = {
|
||||
.sampler_index_or_mode = desc.sampler_index_or_mode,
|
||||
.index = desc.index,
|
||||
.immediate_indices = desc.immediate_indices,
|
||||
.op = BIFROST_TEX_OP_GRDESC_DER,
|
||||
.offset_or_bias_disable = true,
|
||||
.shadow_or_clamp_disable = true,
|
||||
.array = false,
|
||||
.dimension = desc.dimension,
|
||||
.format = desc.format,
|
||||
.mask = desc.mask,
|
||||
};
|
||||
|
||||
unsigned coords_comp_count =
|
||||
instr->coord_components -
|
||||
(instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
|
||||
bi_index derivs[4];
|
||||
unsigned sr_count = 0;
|
||||
|
||||
if (coords_comp_count > 2)
|
||||
derivs[sr_count++] = bi_extract(b, ddx, 2);
|
||||
derivs[sr_count++] = bi_extract(b, ddy, 0);
|
||||
if (coords_comp_count > 1)
|
||||
derivs[sr_count++] = bi_extract(b, ddy, 1);
|
||||
if (coords_comp_count > 2)
|
||||
derivs[sr_count++] = bi_extract(b, ddy, 2);
|
||||
|
||||
bi_index derivs_packed = bi_temp(b->shader);
|
||||
bi_make_vec_to(b, derivs_packed, derivs, NULL, sr_count, 32);
|
||||
bi_index grdesc = bi_temp(b->shader);
|
||||
bi_instr *I =
|
||||
bi_texc_to(b, grdesc, derivs_packed, bi_extract(b, ddx, 0),
|
||||
coords_comp_count > 1 ? bi_extract(b, ddx, 1) : bi_zero(),
|
||||
bi_imm_u32(gropdesc.packed), true, sr_count, 0);
|
||||
I->register_format = BI_REGISTER_FORMAT_U32;
|
||||
|
||||
bi_emit_cached_split_i32(b, grdesc, 4);
|
||||
|
||||
dregs[BIFROST_TEX_DREG_LOD] = bi_extract(b, grdesc, 0);
|
||||
desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
|
||||
}
|
||||
|
||||
/* Allocate staging registers contiguously by compacting the array. */
|
||||
unsigned sr_count = 0;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) {
|
||||
if (!bi_is_null(dregs[i]))
|
||||
dregs[sr_count++] = dregs[i];
|
||||
}
|
||||
|
||||
unsigned res_size = instr->def.bit_size == 16 ? 2 : 4;
|
||||
|
||||
bi_index sr = sr_count ? bi_temp(b->shader) : bi_null();
|
||||
|
||||
if (sr_count)
|
||||
bi_emit_collect_to(b, sr, dregs, sr_count);
|
||||
|
||||
if (instr->op == nir_texop_lod) {
|
||||
assert(instr->def.num_components == 2 && instr->def.bit_size == 32);
|
||||
|
||||
bi_index res[2];
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
desc.shadow_or_clamp_disable = i != 0;
|
||||
|
||||
bi_index grdesc = bi_temp(b->shader);
|
||||
bi_instr *I = bi_texc_to(b, grdesc, sr, cx, cy,
|
||||
bi_imm_u32(desc.packed), false, sr_count, 0);
|
||||
I->register_format = BI_REGISTER_FORMAT_U32;
|
||||
|
||||
bi_emit_cached_split_i32(b, grdesc, 4);
|
||||
|
||||
bi_index lod = bi_s16_to_f32(b, bi_half(bi_extract(b, grdesc, 0), 0));
|
||||
|
||||
lod = bi_fmul_f32(b, lod, bi_imm_f32(1.0f / 256));
|
||||
|
||||
if (i == 0)
|
||||
lod = bi_fround_f32(b, lod, BI_ROUND_NONE);
|
||||
|
||||
res[i] = lod;
|
||||
}
|
||||
|
||||
bi_make_vec_to(b, bi_def_index(&instr->def), res, NULL, 2, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
bi_index dst = bi_temp(b->shader);
|
||||
|
||||
bi_instr *I =
|
||||
bi_texc_to(b, dst, sr, cx, cy, bi_imm_u32(desc.packed),
|
||||
!nir_tex_instr_has_implicit_derivative(instr), sr_count, 0);
|
||||
I->register_format = bi_reg_fmt_for_nir(instr->dest_type);
|
||||
|
||||
bi_index w[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
|
||||
bi_emit_split_i32(b, w, dst, res_size);
|
||||
bi_emit_collect_to(b, bi_def_index(&instr->def), w,
|
||||
DIV_ROUND_UP(instr->def.num_components * res_size, 4));
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
|
|
@ -4351,103 +3773,6 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *tex)
|
|||
tex->def.num_components, tex->def.bit_size);
|
||||
}
|
||||
|
||||
/* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube
|
||||
* textures with sufficiently small immediate indices. Anything else
|
||||
* needs a complete texture op. */
|
||||
|
||||
static void
|
||||
bi_emit_texs(bi_builder *b, nir_tex_instr *instr)
|
||||
{
|
||||
int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
|
||||
assert(coord_idx >= 0);
|
||||
bi_index coords = bi_src_index(&instr->src[coord_idx].src);
|
||||
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
|
||||
bi_index face, s, t;
|
||||
bi_emit_cube_coord(b, coords, &face, &s, &t);
|
||||
|
||||
bi_texs_cube_to(b, instr->def.bit_size, bi_def_index(&instr->def), s, t,
|
||||
face, instr->sampler_index, instr->texture_index);
|
||||
} else {
|
||||
bi_texs_2d_to(b, instr->def.bit_size, bi_def_index(&instr->def),
|
||||
bi_extract(b, coords, 0), bi_extract(b, coords, 1),
|
||||
instr->op != nir_texop_tex, /* zero LOD */
|
||||
instr->sampler_index, instr->texture_index);
|
||||
}
|
||||
|
||||
bi_split_def(b, &instr->def);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_simple_tex(nir_tex_instr *instr)
|
||||
{
|
||||
if (instr->op != nir_texop_tex && instr->op != nir_texop_txl)
|
||||
return false;
|
||||
|
||||
if (instr->dest_type != nir_type_float32 &&
|
||||
instr->dest_type != nir_type_float16)
|
||||
return false;
|
||||
|
||||
if (instr->is_shadow || instr->is_array)
|
||||
return false;
|
||||
|
||||
switch (instr->sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_EXTERNAL:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
break;
|
||||
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
/* LOD can't be specified with TEXS_CUBE */
|
||||
if (instr->op == nir_texop_txl)
|
||||
return false;
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < instr->num_srcs; ++i) {
|
||||
if (instr->src[i].src_type != nir_tex_src_lod &&
|
||||
instr->src[i].src_type != nir_tex_src_coord)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Indices need to fit in provided bits */
|
||||
unsigned idx_bits = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE ? 2 : 3;
|
||||
if (MAX2(instr->sampler_index, instr->texture_index) >= (1 << idx_bits))
|
||||
return false;
|
||||
|
||||
int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
|
||||
if (lod_idx < 0)
|
||||
return true;
|
||||
|
||||
nir_src lod = instr->src[lod_idx].src;
|
||||
return nir_src_is_zero(lod);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_tex(bi_builder *b, nir_tex_instr *instr)
|
||||
{
|
||||
/* If txf is used, we assume there is a valid sampler bound at index 0. Use
|
||||
* it for txf operations, since there may be no other valid samplers. This is
|
||||
* a workaround: txf does not require a sampler in NIR (so sampler_index is
|
||||
* undefined) but we need one in the hardware. This is ABI with the driver.
|
||||
*
|
||||
* On Valhall, as the descriptor table is encoded in the index, this should
|
||||
* be handled by the driver.
|
||||
*/
|
||||
if (!nir_tex_instr_need_sampler(instr) && b->shader->arch < 9)
|
||||
instr->sampler_index = 0;
|
||||
|
||||
if (b->shader->arch >= 9)
|
||||
bi_emit_tex_valhall(b, instr);
|
||||
else if (bi_is_simple_tex(instr))
|
||||
bi_emit_texs(b, instr);
|
||||
else
|
||||
bi_emit_texc(b, instr);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_phi(bi_builder *b, nir_phi_instr *instr)
|
||||
{
|
||||
|
|
@ -4514,7 +3839,8 @@ bi_emit_instr(bi_builder *b, struct nir_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_instr_type_tex:
|
||||
bi_emit_tex(b, nir_instr_as_tex(instr));
|
||||
assert(b->shader->arch >= 9);
|
||||
bi_emit_tex_valhall(b, nir_instr_as_tex(instr));
|
||||
break;
|
||||
|
||||
case nir_instr_type_jump:
|
||||
|
|
|
|||
|
|
@ -943,8 +943,7 @@ bifrost_postprocess_nir(nir_shader *nir,
|
|||
&info->vs.needs_extended_fifo);
|
||||
}
|
||||
|
||||
if (pan_arch(gpu_id) >= 9)
|
||||
NIR_PASS(_, nir, pan_nir_lower_tex, gpu_id);
|
||||
NIR_PASS(_, nir, pan_nir_lower_tex, gpu_id);
|
||||
|
||||
/* Our OpenCL compiler (src/panfrost/clc/pan_compile.c) has a very weird and
|
||||
* suboptimal optimization pipeline that results in a lot of unoptimized
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue