diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index bcdf65ee792..85bf0c51ad3 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -772,6 +772,11 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state) case nir_intrinsic_cubeface_pan: case nir_intrinsic_cube_ssel_pan: case nir_intrinsic_cube_tsel_pan: + case nir_intrinsic_texs_2d_pan: + case nir_intrinsic_texs_cube_pan: + case nir_intrinsic_texc0_pan: + case nir_intrinsic_texc1_pan: + case nir_intrinsic_texc2_pan: case nir_intrinsic_atomic_counter_read: case nir_intrinsic_atomic_counter_read_deref: case nir_intrinsic_is_sparse_texels_resident: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 2aab55ef8f5..08292cee484 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1745,6 +1745,22 @@ intrinsic("cube_ssel_pan", [1, 1, 1], dest_comp=1, bit_sizes=[32], intrinsic("cube_tsel_pan", [1, 1, 1], dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER]) +# src = { x, y } +intrinsic("texs_2d_pan", [1, 1], dest_comp=4, bit_sizes=[16, 32], + indices=[DEST_TYPE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER]) + +# src = { s, t, face } +intrinsic("texs_cube_pan", [1, 1, 1], dest_comp=4, bit_sizes=[16, 32], + indices=[DEST_TYPE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER]) + +# src = { s, t, desc, sr0, sr1 } +intrinsic("texc0_pan", [1, 1, 1], dest_comp=4, bit_sizes=[16, 32], + indices=[DEST_TYPE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER]) +intrinsic("texc1_pan", [1, 1, 1, -1], dest_comp=4, bit_sizes=[16, 32], + indices=[DEST_TYPE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER]) +intrinsic("texc2_pan", [1, 1, 1, -1, -1], dest_comp=4, bit_sizes=[16, 32], + indices=[DEST_TYPE, FLAGS], flags=[CAN_ELIMINATE, CAN_REORDER]) + # Loads the sampler paramaters # src[] = { sampler_index } load("sampler_lod_parameters", [1], flags=[CAN_ELIMINATE, CAN_REORDER]) diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 1bd11c12eb9..499bb434ce0 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -2029,6 +2029,79 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_split_def(b, &instr->def); break; + case nir_intrinsic_texs_2d_pan: { + struct pan_bi_tex_flags flags = + nir_intrinsic_pan_bi_tex_flags(instr); + + bi_instr *I = bi_texs_2d_to(b, instr->def.bit_size, + bi_def_index(&instr->def), + bi_src_index(&instr->src[0]), + bi_src_index(&instr->src[1]), + flags.explicit_lod, + flags.sampler_idx, + flags.texture_idx); + I->skip = flags.skip; + + bi_split_def(b, &instr->def); + break; + } + + case nir_intrinsic_texs_cube_pan: { + struct pan_bi_tex_flags flags = + nir_intrinsic_pan_bi_tex_flags(instr); + assert(!flags.explicit_lod); + + bi_instr *I = bi_texs_cube_to(b, instr->def.bit_size, + bi_def_index(&instr->def), + bi_src_index(&instr->src[0]), + bi_src_index(&instr->src[1]), + bi_src_index(&instr->src[2]), + flags.sampler_idx, + flags.texture_idx); + I->skip = flags.skip; + + bi_split_def(b, &instr->def); + break; + } + + case nir_intrinsic_texc0_pan: + case nir_intrinsic_texc1_pan: + case nir_intrinsic_texc2_pan: { + bi_index cx = bi_src_index(&instr->src[0]); + bi_index cy = bi_src_index(&instr->src[1]); + bi_index desc = bi_src_index(&instr->src[2]); + struct pan_bi_tex_flags flags = + nir_intrinsic_pan_bi_tex_flags(instr); + + bi_index sr_comps[8]; + unsigned sr_count = 0; + + if (instr->intrinsic != nir_intrinsic_texc0_pan) { + bi_index sr0 = bi_src_index(&instr->src[3]); + for (unsigned i = 0; i < nir_src_num_components(instr->src[3]); i++) + sr_comps[sr_count++] = bi_extract(b, sr0, i); + } + if (instr->intrinsic == nir_intrinsic_texc2_pan) { + bi_index sr1 = bi_src_index(&instr->src[4]); + for (unsigned i = 0; i < nir_src_num_components(instr->src[4]); i++) + sr_comps[sr_count++] = bi_extract(b, sr1, i); + } + + bi_index sr = bi_null(); + if (sr_count > 0) { + sr = bi_temp(b->shader); + bi_emit_collect_to(b, sr, sr_comps, sr_count); + } + + bi_instr *I = bi_texc_to(b, bi_def_index(&instr->def), sr, cx, cy, + desc, flags.explicit_lod, sr_count, 0); + I->register_format = bi_reg_fmt_for_nir(nir_intrinsic_dest_type(instr)); + I->skip = flags.skip; + + bi_split_def(b, &instr->def); + break; + } + case nir_intrinsic_load_pixel_coord: /* Vectorized load of the preloaded i16vec2 */ bi_mov_i32_to(b, dst, bi_preload(b, BI_PRELOAD_POSITION_XY)); diff --git a/src/panfrost/compiler/pan_nir.h b/src/panfrost/compiler/pan_nir.h index cc10d6aca7a..afe170a8298 100644 --- a/src/panfrost/compiler/pan_nir.h +++ b/src/panfrost/compiler/pan_nir.h @@ -85,6 +85,27 @@ bool pan_nir_lower_image_index(nir_shader *shader, bool pan_nir_lower_texel_buffer_fetch_index(nir_shader *shader, unsigned attrib_offset); +PRAGMA_DIAGNOSTIC_PUSH +PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) +struct pan_bi_tex_flags { + bool skip : 1; + bool explicit_lod : 1; + unsigned _pad : 14; + unsigned sampler_idx : 8; + unsigned texture_idx : 8; +}; +PRAGMA_DIAGNOSTIC_POP +static_assert(sizeof(struct pan_bi_tex_flags) == 4, "Must fit in uint32_t"); + +static inline struct pan_bi_tex_flags +nir_intrinsic_pan_bi_tex_flags(const nir_intrinsic_instr *instr) +{ + uint32_t flags_u32 = nir_intrinsic_flags(instr); + struct pan_bi_tex_flags flags; + memcpy(&flags, &flags_u32, sizeof(flags)); + return flags; +} + PRAGMA_DIAGNOSTIC_PUSH PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) struct pan_va_tex_flags {