diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 499bb434ce0..5b6cde93bca 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -3658,35 +3658,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) } } -/* Returns dimension with 0 special casing cubemaps. Shamelessly copied from - * Midgard */ -static unsigned -bifrost_tex_format(enum glsl_sampler_dim dim) -{ - switch (dim) { - case GLSL_SAMPLER_DIM_1D: - case GLSL_SAMPLER_DIM_BUF: - return 1; - - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_MS: - case GLSL_SAMPLER_DIM_EXTERNAL: - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_SUBPASS: - case GLSL_SAMPLER_DIM_SUBPASS_MS: - return 2; - - case GLSL_SAMPLER_DIM_3D: - return 3; - - case GLSL_SAMPLER_DIM_CUBE: - return 0; - - default: - UNREACHABLE("Unknown sampler dim type\n"); - } -} - static enum bi_dimension valhall_tex_dimension(enum glsl_sampler_dim dim) { @@ -3714,555 +3685,6 @@ valhall_tex_dimension(enum glsl_sampler_dim dim) } } -static enum bifrost_texture_format_full -bi_texture_format(nir_alu_type T, enum bi_clamp clamp) -{ - switch (T) { - case nir_type_float16: - return BIFROST_TEXTURE_FORMAT_F16 + clamp; - case nir_type_float32: - return BIFROST_TEXTURE_FORMAT_F32 + clamp; - case nir_type_uint16: - return BIFROST_TEXTURE_FORMAT_U16; - case nir_type_int16: - return BIFROST_TEXTURE_FORMAT_S16; - case nir_type_uint32: - return BIFROST_TEXTURE_FORMAT_U32; - case nir_type_int32: - return BIFROST_TEXTURE_FORMAT_S32; - default: - UNREACHABLE("Invalid type for texturing"); - } -} - -/* Array indices are specified as 32-bit uints, need to convert. In .z component - * from NIR */ -static bi_index -bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T) -{ - /* For (u)int we can just passthrough */ - nir_alu_type base = nir_alu_type_get_base_type(T); - if (base == nir_type_int || base == nir_type_uint) - return idx; - - /* Otherwise we convert */ - assert(T == nir_type_float32); - - /* OpenGL ES 3.2 specification section 8.14.2 ("Coordinate Wrapping and - * Texel Selection") defines the layer to be taken from clamp(RNE(r), - * 0, dt - 1). So we use round RTE, clamping is handled at the data - * structure level */ - - bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx); - I->round = BI_ROUND_NONE; - return I->dest[0]; -} - -/* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a - * 16-bit 8:8 fixed-point format. We lower as: - * - * F32_TO_S32(clamp(x, -16.0, +16.0) * 256.0) & 0xFFFF = - * MKVEC(F32_TO_S32(clamp(x * 1.0/16.0, -1.0, 1.0) * (16.0 * 256.0)), #0) - */ - -static bi_index -bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16) -{ - /* Precompute for constant LODs to avoid general constant folding */ - if (lod.type == BI_INDEX_CONSTANT) { - uint32_t raw = lod.value; - float x = fp16 ? _mesa_half_to_float(raw) : uif(raw); - int32_t s32 = CLAMP(x, -16.0f, 16.0f) * 256.0f; - return bi_imm_u32(s32 & 0xFFFF); - } - - /* Sort of arbitrary. Must be less than 128.0, greater than or equal to - * the max LOD (16 since we cap at 2^16 texture dimensions), and - * preferably small to minimize precision loss */ - const float max_lod = 16.0; - - bi_instr *fsat = - bi_fma_f32_to(b, bi_temp(b->shader), fp16 ? bi_half(lod, false) : lod, - bi_imm_f32(1.0f / max_lod), bi_negzero()); - - fsat->clamp = BI_CLAMP_CLAMP_M1_1; - - bi_index fmul = - bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f), bi_negzero()); - - return bi_mkvec_v2i16(b, bi_half(bi_f32_to_s32(b, fmul), false), - bi_imm_u16(0)); -} - -/* FETCH takes a 32-bit staging register containing the LOD as an integer in - * the bottom 16-bits and (if present) the cube face index in the top 16-bits. - * TODO: Cube face. - */ - -static bi_index -bi_emit_texc_lod_cube(bi_builder *b, bi_index lod) -{ - return bi_lshift_or_i32(b, lod, bi_zero(), bi_imm_u8(8)); -} - -/* The hardware specifies texel offsets and multisample indices together as a - * u8vec4 . By default all are zero, so if have either a - * nonzero texel offset or a nonzero multisample index, we build a u8vec4 with - * the bits we need and return that to be passed as a staging register. Else we - * return 0 to avoid allocating a data register when everything is zero. */ - -static bi_index -bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr) -{ - bi_index dest = bi_zero(); - - int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset); - if (offs_idx >= 0 && !nir_src_is_zero(instr->src[offs_idx].src)) { - unsigned nr = nir_src_num_components(instr->src[offs_idx].src); - bi_index idx = bi_src_index(&instr->src[offs_idx].src); - dest = bi_mkvec_v4i8( - b, (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0), - (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0), - (nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0), - bi_imm_u8(0)); - } - - int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index); - if (ms_idx >= 0 && !nir_src_is_zero(instr->src[ms_idx].src)) { - dest = bi_lshift_or_i32(b, bi_src_index(&instr->src[ms_idx].src), dest, - bi_imm_u8(24)); - } - - return dest; -} - -static void -bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index *face, bi_index *s, - bi_index *t) -{ - /* Compute max { |x|, |y|, |z| } */ - bi_index maxxyz = bi_temp(b->shader); - *face = bi_temp(b->shader); - - bi_index cx = bi_extract(b, coord, 0), cy = bi_extract(b, coord, 1), - cz = bi_extract(b, coord, 2); - - /* Use a pseudo op on Bifrost due to tuple restrictions */ - if (b->shader->arch <= 8) { - bi_cubeface_to(b, maxxyz, *face, cx, cy, cz); - } else { - bi_cubeface1_to(b, maxxyz, cx, cy, cz); - bi_cubeface2_v9_to(b, *face, cx, cy, cz); - } - - /* Select coordinates */ - bi_index ssel = - bi_cube_ssel(b, bi_extract(b, coord, 2), bi_extract(b, coord, 0), *face); - bi_index tsel = - bi_cube_tsel(b, bi_extract(b, coord, 1), bi_extract(b, coord, 2), *face); - - /* The OpenGL ES specification requires us to transform an input vector - * (x, y, z) to the coordinate, given the selected S/T: - * - * (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1)) - * - * We implement (s shown, t similar) in a form friendlier to FMA - * instructions, and clamp coordinates at the end for correct - * NaN/infinity handling: - * - * fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5) - * - * Take the reciprocal of max{x, y, z} - */ - bi_index rcp = bi_frcp_f32(b, maxxyz); - - /* Calculate 0.5 * (1.0 / max{x, y, z}) */ - bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero()); - - /* Transform the coordinates */ - *s = bi_temp(b->shader); - *t = bi_temp(b->shader); - - bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f)); - bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f)); - - S->clamp = BI_CLAMP_CLAMP_0_1; - T->clamp = BI_CLAMP_CLAMP_0_1; -} - -/* Emits a cube map descriptor, returning lower 32-bits and putting upper - * 32-bits in passed pointer t. The packing of the face with the S coordinate - * exploits the redundancy of floating points with the range restriction of - * CUBEFACE output. - * - * struct cube_map_descriptor { - * float s : 29; - * unsigned face : 3; - * float t : 32; - * } - * - * Since the cube face index is preshifted, this is easy to pack with a bitwise - * MUX.i32 and a fixed mask, selecting the lower bits 29 from s and the upper 3 - * bits from face. - */ - -static bi_index -bi_emit_texc_cube_coord(bi_builder *b, bi_index coord, bi_index *t) -{ - bi_index face, s; - bi_emit_cube_coord(b, coord, &face, &s, t); - bi_index mask = bi_imm_u32(BITFIELD_MASK(29)); - return bi_mux_i32(b, s, face, mask, BI_MUX_BIT); -} - -/* Map to the main texture op used. Some of these (txd in particular) will - * lower to multiple texture ops with different opcodes (GRDESC_DER + TEX in - * sequence). We assume that lowering is handled elsewhere. - */ - -static enum bifrost_tex_op -bi_tex_op(nir_texop op) -{ - switch (op) { - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_txl: - case nir_texop_txd: - return BIFROST_TEX_OP_TEX; - case nir_texop_txf: - case nir_texop_txf_ms: - case nir_texop_tg4: - return BIFROST_TEX_OP_FETCH; - case nir_texop_lod: - return BIFROST_TEX_OP_GRDESC; - case nir_texop_txs: - case nir_texop_query_levels: - case nir_texop_texture_samples: - case nir_texop_samples_identical: - UNREACHABLE("should've been lowered"); - default: - UNREACHABLE("unsupported tex op"); - } -} - -/* Data registers required by texturing in the order they appear. All are - * optional, the texture operation descriptor determines which are present. - * Note since 3D arrays are not permitted at an API level, Z_COORD and - * ARRAY/SHADOW are exlusive, so TEXC in practice reads at most 8 registers */ - -enum bifrost_tex_dreg { - BIFROST_TEX_DREG_Z_COORD = 0, - BIFROST_TEX_DREG_Y_DELTAS = 1, - BIFROST_TEX_DREG_LOD = 2, - BIFROST_TEX_DREG_GRDESC_HI = 3, - BIFROST_TEX_DREG_SHADOW = 4, - BIFROST_TEX_DREG_ARRAY = 5, - BIFROST_TEX_DREG_OFFSETMS = 6, - BIFROST_TEX_DREG_SAMPLER = 7, - BIFROST_TEX_DREG_TEXTURE = 8, - BIFROST_TEX_DREG_COUNT, -}; - -static void -bi_emit_texc(bi_builder *b, nir_tex_instr *instr) -{ - assert((instr->op != nir_texop_txf || - instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) && - "Texel buffers should already have been lowered"); - - struct bifrost_texture_operation desc = { - .op = bi_tex_op(instr->op), - .offset_or_bias_disable = false, /* TODO */ - .shadow_or_clamp_disable = instr->is_shadow, - .array = instr->is_array && instr->op != nir_texop_lod, - .dimension = bifrost_tex_format(instr->sampler_dim), - .format = bi_texture_format(instr->dest_type | instr->def.bit_size, - BI_CLAMP_NONE), /* TODO */ - .mask = 0xF, - }; - - switch (desc.op) { - case BIFROST_TEX_OP_TEX: - desc.lod_or_fetch = BIFROST_LOD_MODE_COMPUTE; - break; - case BIFROST_TEX_OP_FETCH: - desc.lod_or_fetch = (enum bifrost_lod_mode)( - instr->op == nir_texop_tg4 - ? BIFROST_TEXTURE_FETCH_GATHER4_R + instr->component - : BIFROST_TEXTURE_FETCH_TEXEL); - break; - case BIFROST_TEX_OP_GRDESC: - break; - default: - UNREACHABLE("texture op unsupported"); - } - - /* 32-bit indices to be allocated as consecutive staging registers */ - bi_index dregs[BIFROST_TEX_DREG_COUNT] = {}; - bi_index cx = bi_null(), cy = bi_null(); - bi_index ddx = bi_null(); - bi_index ddy = bi_null(); - - for (unsigned i = 0; i < instr->num_srcs; ++i) { - bi_index index = bi_src_index(&instr->src[i].src); - unsigned sz = nir_src_bit_size(instr->src[i].src); - unsigned components = nir_src_num_components(instr->src[i].src); - ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i); - nir_alu_type T = base | sz; - - switch (instr->src[i].src_type) { - case nir_tex_src_coord: - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - cx = bi_emit_texc_cube_coord(b, index, &cy); - } else { - /* Copy XY (for 2D+) or XX (for 1D) */ - cx = bi_extract(b, index, 0); - cy = bi_extract(b, index, MIN2(1, components - 1)); - - assert(components >= 1 && components <= 3); - - if (components == 3 && !desc.array) { - /* 3D */ - dregs[BIFROST_TEX_DREG_Z_COORD] = bi_extract(b, index, 2); - } - } - - if (desc.array) { - dregs[BIFROST_TEX_DREG_ARRAY] = bi_emit_texc_array_index( - b, bi_extract(b, index, components - 1), T); - } - - break; - - case nir_tex_src_lod: - if (desc.op == BIFROST_TEX_OP_TEX && - nir_src_is_zero(instr->src[i].src)) { - desc.lod_or_fetch = BIFROST_LOD_MODE_ZERO; - } else if (desc.op == BIFROST_TEX_OP_TEX) { - assert(base == nir_type_float); - - assert(sz == 16 || sz == 32); - dregs[BIFROST_TEX_DREG_LOD] = - bi_emit_texc_lod_88(b, index, sz == 16); - desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT; - } else { - assert(desc.op == BIFROST_TEX_OP_FETCH); - assert(base == nir_type_uint || base == nir_type_int); - assert(sz == 16 || sz == 32); - - dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, index); - } - - break; - - case nir_tex_src_ddx: - ddx = index; - break; - - case nir_tex_src_ddy: - ddy = index; - break; - - case nir_tex_src_bias: - /* Upper 16-bits interpreted as a clamp, leave zero */ - assert(desc.op == BIFROST_TEX_OP_TEX); - assert(base == nir_type_float); - assert(sz == 16 || sz == 32); - dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_88(b, index, sz == 16); - desc.lod_or_fetch = BIFROST_LOD_MODE_BIAS; - break; - - case nir_tex_src_ms_index: - case nir_tex_src_offset: - if (desc.offset_or_bias_disable) - break; - - dregs[BIFROST_TEX_DREG_OFFSETMS] = - bi_emit_texc_offset_ms_index(b, instr); - if (!bi_is_equiv(dregs[BIFROST_TEX_DREG_OFFSETMS], bi_zero())) - desc.offset_or_bias_disable = true; - break; - - case nir_tex_src_comparator: - dregs[BIFROST_TEX_DREG_SHADOW] = index; - break; - - case nir_tex_src_texture_offset: - dregs[BIFROST_TEX_DREG_TEXTURE] = index; - break; - - case nir_tex_src_sampler_offset: - dregs[BIFROST_TEX_DREG_SAMPLER] = index; - break; - - default: - UNREACHABLE("Unhandled src type in texc emit"); - } - } - - if (desc.op == BIFROST_TEX_OP_FETCH && - bi_is_null(dregs[BIFROST_TEX_DREG_LOD])) { - dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, bi_zero()); - } - - /* Choose an index mode */ - - bool direct_tex = bi_is_null(dregs[BIFROST_TEX_DREG_TEXTURE]); - bool direct_samp = bi_is_null(dregs[BIFROST_TEX_DREG_SAMPLER]); - bool direct = direct_tex && direct_samp; - - desc.immediate_indices = - direct && (instr->sampler_index < 16 && instr->texture_index < 128); - - if (desc.immediate_indices) { - desc.sampler_index_or_mode = instr->sampler_index; - desc.index = instr->texture_index; - } else { - unsigned mode = 0; - - if (direct && instr->sampler_index == instr->texture_index && - instr->sampler_index < 128) { - mode = BIFROST_INDEX_IMMEDIATE_SHARED; - desc.index = instr->texture_index; - } else if (direct && instr->sampler_index < 128) { - mode = BIFROST_INDEX_IMMEDIATE_SAMPLER; - desc.index = instr->sampler_index; - dregs[BIFROST_TEX_DREG_TEXTURE] = - bi_mov_i32(b, bi_imm_u32(instr->texture_index)); - } else if (direct_tex && instr->texture_index < 128) { - mode = BIFROST_INDEX_IMMEDIATE_TEXTURE; - desc.index = instr->texture_index; - - if (direct_samp) { - dregs[BIFROST_TEX_DREG_SAMPLER] = - bi_mov_i32(b, bi_imm_u32(instr->sampler_index)); - } - } else if (direct_samp && instr->sampler_index < 128) { - mode = BIFROST_INDEX_IMMEDIATE_SAMPLER; - desc.index = instr->sampler_index; - - if (direct_tex) { - dregs[BIFROST_TEX_DREG_TEXTURE] = - bi_mov_i32(b, bi_imm_u32(instr->texture_index)); - } - } else { - mode = BIFROST_INDEX_REGISTER; - - if (direct_tex) { - dregs[BIFROST_TEX_DREG_TEXTURE] = - bi_mov_i32(b, bi_imm_u32(instr->texture_index)); - } - - if (direct_samp) { - dregs[BIFROST_TEX_DREG_SAMPLER] = - bi_mov_i32(b, bi_imm_u32(instr->sampler_index)); - } - } - - mode |= (BIFROST_TEXTURE_OPERATION_SINGLE << 2); - desc.sampler_index_or_mode = mode; - } - - if (!bi_is_null(ddx) || !bi_is_null(ddy)) { - assert(!bi_is_null(ddx) && !bi_is_null(ddy)); - struct bifrost_texture_operation gropdesc = { - .sampler_index_or_mode = desc.sampler_index_or_mode, - .index = desc.index, - .immediate_indices = desc.immediate_indices, - .op = BIFROST_TEX_OP_GRDESC_DER, - .offset_or_bias_disable = true, - .shadow_or_clamp_disable = true, - .array = false, - .dimension = desc.dimension, - .format = desc.format, - .mask = desc.mask, - }; - - unsigned coords_comp_count = - instr->coord_components - - (instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE); - bi_index derivs[4]; - unsigned sr_count = 0; - - if (coords_comp_count > 2) - derivs[sr_count++] = bi_extract(b, ddx, 2); - derivs[sr_count++] = bi_extract(b, ddy, 0); - if (coords_comp_count > 1) - derivs[sr_count++] = bi_extract(b, ddy, 1); - if (coords_comp_count > 2) - derivs[sr_count++] = bi_extract(b, ddy, 2); - - bi_index derivs_packed = bi_temp(b->shader); - bi_make_vec_to(b, derivs_packed, derivs, NULL, sr_count, 32); - bi_index grdesc = bi_temp(b->shader); - bi_instr *I = - bi_texc_to(b, grdesc, derivs_packed, bi_extract(b, ddx, 0), - coords_comp_count > 1 ? bi_extract(b, ddx, 1) : bi_zero(), - bi_imm_u32(gropdesc.packed), true, sr_count, 0); - I->register_format = BI_REGISTER_FORMAT_U32; - - bi_emit_cached_split_i32(b, grdesc, 4); - - dregs[BIFROST_TEX_DREG_LOD] = bi_extract(b, grdesc, 0); - desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT; - } - - /* Allocate staging registers contiguously by compacting the array. */ - unsigned sr_count = 0; - - for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) { - if (!bi_is_null(dregs[i])) - dregs[sr_count++] = dregs[i]; - } - - unsigned res_size = instr->def.bit_size == 16 ? 2 : 4; - - bi_index sr = sr_count ? bi_temp(b->shader) : bi_null(); - - if (sr_count) - bi_emit_collect_to(b, sr, dregs, sr_count); - - if (instr->op == nir_texop_lod) { - assert(instr->def.num_components == 2 && instr->def.bit_size == 32); - - bi_index res[2]; - for (unsigned i = 0; i < 2; i++) { - desc.shadow_or_clamp_disable = i != 0; - - bi_index grdesc = bi_temp(b->shader); - bi_instr *I = bi_texc_to(b, grdesc, sr, cx, cy, - bi_imm_u32(desc.packed), false, sr_count, 0); - I->register_format = BI_REGISTER_FORMAT_U32; - - bi_emit_cached_split_i32(b, grdesc, 4); - - bi_index lod = bi_s16_to_f32(b, bi_half(bi_extract(b, grdesc, 0), 0)); - - lod = bi_fmul_f32(b, lod, bi_imm_f32(1.0f / 256)); - - if (i == 0) - lod = bi_fround_f32(b, lod, BI_ROUND_NONE); - - res[i] = lod; - } - - bi_make_vec_to(b, bi_def_index(&instr->def), res, NULL, 2, 32); - return; - } - - bi_index dst = bi_temp(b->shader); - - bi_instr *I = - bi_texc_to(b, dst, sr, cx, cy, bi_imm_u32(desc.packed), - !nir_tex_instr_has_implicit_derivative(instr), sr_count, 0); - I->register_format = bi_reg_fmt_for_nir(instr->dest_type); - - bi_index w[4] = {bi_null(), bi_null(), bi_null(), bi_null()}; - bi_emit_split_i32(b, w, dst, res_size); - bi_emit_collect_to(b, bi_def_index(&instr->def), w, - DIV_ROUND_UP(instr->def.num_components * res_size, 4)); -} - static void bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *tex) { @@ -4351,103 +3773,6 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *tex) tex->def.num_components, tex->def.bit_size); } -/* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube - * textures with sufficiently small immediate indices. Anything else - * needs a complete texture op. */ - -static void -bi_emit_texs(bi_builder *b, nir_tex_instr *instr) -{ - int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord); - assert(coord_idx >= 0); - bi_index coords = bi_src_index(&instr->src[coord_idx].src); - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - bi_index face, s, t; - bi_emit_cube_coord(b, coords, &face, &s, &t); - - bi_texs_cube_to(b, instr->def.bit_size, bi_def_index(&instr->def), s, t, - face, instr->sampler_index, instr->texture_index); - } else { - bi_texs_2d_to(b, instr->def.bit_size, bi_def_index(&instr->def), - bi_extract(b, coords, 0), bi_extract(b, coords, 1), - instr->op != nir_texop_tex, /* zero LOD */ - instr->sampler_index, instr->texture_index); - } - - bi_split_def(b, &instr->def); -} - -static bool -bi_is_simple_tex(nir_tex_instr *instr) -{ - if (instr->op != nir_texop_tex && instr->op != nir_texop_txl) - return false; - - if (instr->dest_type != nir_type_float32 && - instr->dest_type != nir_type_float16) - return false; - - if (instr->is_shadow || instr->is_array) - return false; - - switch (instr->sampler_dim) { - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_EXTERNAL: - case GLSL_SAMPLER_DIM_RECT: - break; - - case GLSL_SAMPLER_DIM_CUBE: - /* LOD can't be specified with TEXS_CUBE */ - if (instr->op == nir_texop_txl) - return false; - break; - - default: - return false; - } - - for (unsigned i = 0; i < instr->num_srcs; ++i) { - if (instr->src[i].src_type != nir_tex_src_lod && - instr->src[i].src_type != nir_tex_src_coord) - return false; - } - - /* Indices need to fit in provided bits */ - unsigned idx_bits = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE ? 2 : 3; - if (MAX2(instr->sampler_index, instr->texture_index) >= (1 << idx_bits)) - return false; - - int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod); - if (lod_idx < 0) - return true; - - nir_src lod = instr->src[lod_idx].src; - return nir_src_is_zero(lod); -} - -static void -bi_emit_tex(bi_builder *b, nir_tex_instr *instr) -{ - /* If txf is used, we assume there is a valid sampler bound at index 0. Use - * it for txf operations, since there may be no other valid samplers. This is - * a workaround: txf does not require a sampler in NIR (so sampler_index is - * undefined) but we need one in the hardware. This is ABI with the driver. - * - * On Valhall, as the descriptor table is encoded in the index, this should - * be handled by the driver. - */ - if (!nir_tex_instr_need_sampler(instr) && b->shader->arch < 9) - instr->sampler_index = 0; - - if (b->shader->arch >= 9) - bi_emit_tex_valhall(b, instr); - else if (bi_is_simple_tex(instr)) - bi_emit_texs(b, instr); - else - bi_emit_texc(b, instr); -} - static void bi_emit_phi(bi_builder *b, nir_phi_instr *instr) { @@ -4514,7 +3839,8 @@ bi_emit_instr(bi_builder *b, struct nir_instr *instr) break; case nir_instr_type_tex: - bi_emit_tex(b, nir_instr_as_tex(instr)); + assert(b->shader->arch >= 9); + bi_emit_tex_valhall(b, nir_instr_as_tex(instr)); break; case nir_instr_type_jump: diff --git a/src/panfrost/compiler/bifrost/bifrost_nir.c b/src/panfrost/compiler/bifrost/bifrost_nir.c index 3d38df4c704..b1da1e54fbb 100644 --- a/src/panfrost/compiler/bifrost/bifrost_nir.c +++ b/src/panfrost/compiler/bifrost/bifrost_nir.c @@ -943,8 +943,7 @@ bifrost_postprocess_nir(nir_shader *nir, &info->vs.needs_extended_fifo); } - if (pan_arch(gpu_id) >= 9) - NIR_PASS(_, nir, pan_nir_lower_tex, gpu_id); + NIR_PASS(_, nir, pan_nir_lower_tex, gpu_id); /* Our OpenCL compiler (src/panfrost/clc/pan_compile.c) has a very weird and * suboptimal optimization pipeline that results in a lot of unoptimized