diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 50759dc4275..fef2f0fd0ac 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2681,3 +2681,26 @@ intrinsic("enqueue_node_payloads", src_comp=[-1]) # Returns true if it has been called for every payload. intrinsic("finalize_incoming_node_payload", src_comp=[-1], dest_comp=1) + +# Loads the texture/sampler state words for a given descriptor. +intrinsic("load_tex_state_pco", dest_comp=0, indices=[DESC_SET, BINDING, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) +intrinsic("load_smp_state_pco", dest_comp=0, indices=[DESC_SET, BINDING, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) + +# Loads the texture/sampler metadata for a given descriptor. +intrinsic("load_tex_meta_pco", dest_comp=0, indices=[DESC_SET, BINDING, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) +intrinsic("load_smp_meta_pco", dest_comp=0, indices=[DESC_SET, BINDING, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) + +index("uint16_t", "smp_flags_pco") + +# smp_pco(data, tex_state, smp_state) +# Performs a standard sampling operation with the given data and state words. +# Outputs between 1-4 comps. +intrinsic("smp_pco", src_comp=[16, 4, 4], dest_comp=0, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32]) + +# smp_coeffs_pco(data, tex_state, smp_state) +# Returns the calculated sampling coefficients for the given data and state words. +intrinsic("smp_coeffs_pco", src_comp=[16, 4, 4], dest_comp=8, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32]) + +# alphatst_pco(data, comparator, comparison op) +# Performs an alpha test on the given parameters, returning float 0/1 depending on the comparison result. +intrinsic("alphatst_pco", src_comp=[1, 1, 1], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32]) diff --git a/src/imagination/include/hwdef/rogue_hw_defs.h b/src/imagination/include/hwdef/rogue_hw_defs.h index e6c51311cef..83c1b8cbd97 100644 --- a/src/imagination/include/hwdef/rogue_hw_defs.h +++ b/src/imagination/include/hwdef/rogue_hw_defs.h @@ -105,6 +105,8 @@ /* Number of TEXSTATE_SAMPLER state words that need setting up. */ #define ROGUE_NUM_TEXSTATE_SAMPLER_WORDS 2U +#define ROGUE_NUM_TEXSTATE_DWORDS 4U + /* 12 dwords reserved for shared register management. The first dword is the * number of shared register blocks to reload. Should be a multiple of 4 dwords, * size in bytes. @@ -173,4 +175,17 @@ enum { ROGUE_USC_COEFFICIENT_SET_SIZE, }; +enum { + ROGUE_SMP_COEFF_UFRAC = 0, + ROGUE_SMP_COEFF_VFRAC, + ROGUE_SMP_COEFF_SFRAC, + ROGUE_SMP_COEFF_LOD_DVAL_POST_CLAMP, + ROGUE_SMP_COEFF_LOD_DVAL_PRE_CLAMP, + ROGUE_SMP_COEFF_TFRAC_POST_CLAMP, + ROGUE_SMP_COEFF_TFRAC_PRE_CLAMP, + ROGUE_SMP_COEFF_COUNT, +}; + +#define ROGUE_SMP_MAX_DATA_WORDS 18U + #endif /* ROGUE_HW_DEFS_H */ diff --git a/src/imagination/pco/meson.build b/src/imagination/pco/meson.build index e500502fd48..4f18154e56d 100644 --- a/src/imagination/pco/meson.build +++ b/src/imagination/pco/meson.build @@ -18,6 +18,7 @@ libpowervr_compiler_files = files( 'pco_nir.c', 'pco_nir_compute.c', 'pco_nir_pvfio.c', + 'pco_nir_tex.c', 'pco_nir_vk.c', 'pco_opt.c', 'pco_print.c', diff --git a/src/imagination/pco/pco_data.h b/src/imagination/pco/pco_data.h index 8e5ff7750df..b89eb146180 100644 --- a/src/imagination/pco/pco_data.h +++ b/src/imagination/pco/pco_data.h @@ -74,6 +74,26 @@ typedef struct _pco_cs_data { unsigned workgroup_size[3]; /** Workgroup size. */ } pco_cs_data; +/** PCO image descriptor metadata. */ +enum pco_image_meta { + PCO_IMAGE_META_LAYER_SIZE, + PCO_IMAGE_META_RSVD0, + PCO_IMAGE_META_RSVD1, + PCO_IMAGE_META_RSVD2, + + PCO_IMAGE_META_COUNT, +}; + +/** PCO sampler descriptor metadata. */ +enum pco_sampler_meta { + PCO_SAMPLER_META_COMPARE_OP, + PCO_SAMPLER_META_RSVD0, + PCO_SAMPLER_META_RSVD1, + PCO_SAMPLER_META_RSVD2, + + PCO_SAMPLER_META_COUNT, +}; + /** PCO descriptor binding data. */ typedef struct _pco_binding_data { pco_range range; /** Descriptor location range. */ diff --git a/src/imagination/pco/pco_internal.h b/src/imagination/pco/pco_internal.h index 4a02e35d689..9b6a25a49b5 100644 --- a/src/imagination/pco/pco_internal.h +++ b/src/imagination/pco/pco_internal.h @@ -1522,6 +1522,28 @@ static inline bool pco_should_print_binary(pco_shader *shader) return true; } +/* Interface with NIR. */ +typedef union PACKED _pco_smp_flags { + struct PACKED { + unsigned dim : 2; + bool proj : 1; + bool fcnorm : 1; + bool nncoords : 1; + enum pco_lod_mode lod_mode : 2; + bool pplod : 1; + bool tao : 1; + bool soo : 1; + bool sno : 1; + bool array : 1; + bool integer : 1; + unsigned pad : 3; + }; + + uint16_t _; +} pco_smp_flags; +static_assert(sizeof(pco_smp_flags) == sizeof(uint16_t), + "sizeof(pco_smp_flags) != sizeof(uint16_t)"); + /* PCO IR passes. */ bool pco_const_imms(pco_shader *shader); bool pco_bool(pco_shader *shader); @@ -1534,6 +1556,7 @@ bool pco_legalize(pco_shader *shader); bool pco_nir_compute_instance_check(nir_shader *shader); bool pco_nir_lower_algebraic(nir_shader *shader); bool pco_nir_lower_algebraic_late(nir_shader *shader); +bool pco_nir_lower_tex(nir_shader *shader, pco_common_data *common); bool pco_nir_lower_vk(nir_shader *shader, pco_common_data *common); bool pco_nir_pfo(nir_shader *shader, pco_fs_data *fs); bool pco_nir_point_size(nir_shader *shader); diff --git a/src/imagination/pco/pco_isa.py b/src/imagination/pco/pco_isa.py index 0355aa8b2e9..724dcf94028 100644 --- a/src/imagination/pco/pco_isa.py +++ b/src/imagination/pco/pco_isa.py @@ -1820,8 +1820,8 @@ F_SBMODE = field_enum_type( name='sbmode', num_bits=2, elems=[ ('none', 0b00), - ('data', 0b01), - ('info', 0b10), + ('rawdata', 0b01), + ('coeffs', 0b10), ('both', 0b11), ]) diff --git a/src/imagination/pco/pco_map.py b/src/imagination/pco/pco_map.py index 93e07b5ff48..b19c4d6b6d9 100644 --- a/src/imagination/pco/pco_map.py +++ b/src/imagination/pco/pco_map.py @@ -310,6 +310,31 @@ enum_map(OM_FRED_PART.t, F_RED_PART, [ ('b', 'b'), ]) +enum_map(OM_DIM.t, F_DMN, [ + ('1d', '1d'), + ('2d', '2d'), + ('3d', '3d'), +]) + +enum_map(OM_LOD_MODE.t, F_LODM, [ + ('normal', 'normal'), + ('bias', 'bias'), + ('replace', 'replace'), + ('gradients', 'gradients'), +]) + +enum_map(OM_SB_MODE.t, F_SBMODE, [ + ('none', 'none'), + ('rawdata', 'rawdata'), + ('coeffs', 'coeffs'), + ('both', 'both'), +]) + +enum_map(OM_SCHEDSWAP.t, F_SCHED_CTRL, [ + ('none', 'none'), + ('swap', 'swap'), +]) + class OpRef(object): def __init__(self, ref_type, index, mods): self.type = ref_type @@ -1320,6 +1345,75 @@ encode_map(O_ATOMIC, op_ref_maps=[('backend', [['s0', 's1', 's2', 's3', 's4', 's5']], ['drc', ['s0', 's1', 's2', 's3', 's4', 's5']])] ) +encode_map(O_SMP, + encodings=[ + (I_SMP_EXTAB, [ + ('fcnorm', OM_FCNORM), + ('drc', ('pco_ref_get_drc', SRC(0))), + ('dmn', OM_DIM), + ('chan', ('pco_ref_get_imm', SRC(5))), + ('lodm', OM_LOD_MODE), + ('pplod', OM_PPLOD), + ('proj', OM_PROJ), + ('sbmode', OM_SB_MODE), + ('nncoords', OM_NNCOORDS), + ('sno', OM_SNO), + ('soo', OM_SOO), + ('tao', OM_TAO), + ('f16', OM_F16), + ('swap', OM_SCHEDSWAP), + ('cachemode_ld', OM_MCU_CACHE_MODE_LD) + ]), + (I_SMP_EXTA, [ + ('fcnorm', OM_FCNORM), + ('drc', ('pco_ref_get_drc', SRC(0))), + ('dmn', OM_DIM), + ('chan', ('pco_ref_get_imm', SRC(5))), + ('lodm', OM_LOD_MODE), + ('pplod', OM_PPLOD), + ('proj', OM_PROJ), + ('sbmode', OM_SB_MODE), + ('nncoords', OM_NNCOORDS), + ('sno', OM_SNO), + ('soo', OM_SOO), + ('tao', OM_TAO) + ], [ + (OM_F16, '== false'), + (OM_SCHEDSWAP, '== PCO_SCHEDSWAP_NONE'), + (OM_MCU_CACHE_MODE_LD, '== PCO_CACHEMODE_LD_NORMAL') + ]), + (I_SMP_BRIEF, [ + ('fcnorm', OM_FCNORM), + ('drc', ('pco_ref_get_drc', SRC(0))), + ('dmn', OM_DIM), + ('chan', ('pco_ref_get_imm', SRC(5))), + ('lodm', OM_LOD_MODE) + ], [ + (OM_F16, '== false'), + (OM_SCHEDSWAP, '== PCO_SCHEDSWAP_NONE'), + (OM_MCU_CACHE_MODE_LD, '== PCO_CACHEMODE_LD_NORMAL'), + (OM_PPLOD, '== false'), + (OM_PROJ, '== false'), + (OM_SB_MODE, '== PCO_SB_MODE_NONE'), + (OM_NNCOORDS, '== false'), + (OM_SNO, '== false'), + (OM_SOO, '== false'), + (OM_TAO, '== false') + ]) + ], + op_ref_maps=[('backend', ['s4'], ['drc', 's0', 's1', 's2', ['s3', '_'], 'imm'])] +) + +encode_map(O_ALPHATST, + encodings=[ + (I_VISTEST_ATST, [ + ('pwen', ('!pco_ref_is_null', DEST(0))), + ('ifb', True) + ]) + ], + op_ref_maps=[('backend', [['p0', '_']], ['drc', 's0', 's1', 's2'])] +) + encode_map(O_BBYP0BM, encodings=[ (I_PHASE0_SRC, [ @@ -2088,6 +2182,38 @@ group_map(O_CSEL, ] ) +group_map(O_PSEL, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'p0_p1_p2'), + ('olchk', OM_OLCHK), + ('w1p', False), + ('w0p', True), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', OM_RPT) + ]), + enc_ops=[ + ('0', O_IMADD64, ['ft0', 'fte'], ['pco_zero', 'pco_zero', 'pco_zero', 'is0', SRC(0)]), + ('1', O_MBYP, ['ft1'], [SRC(1)]), + ('2_tst', O_TST, ['ftt', '_'], ['is1', '_'], [(OM_TST_OP_MAIN, 'zero'), (OM_TST_TYPE_MAIN, 'u32'), (OM_PHASE2END, True)]), + ('2_mov', O_MOVC, [DEST(0), '_'], ['ftt', SRC(2), 'is4', '_', '_']) + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[1]', ('0', SRC(1)), 's1'), + ('s[2]', ('0', SRC(2)), 's2'), + ('s[3]', ('1', SRC(0)), 's3'), + ('s[4]', ('2_mov', SRC(1)), 'fte'), + ], + iss=[ + ('is[0]', 's4'), + ('is[1]', 'ft0'), + ('is[4]', 'ft1'), + ], + dests=[('w[0]', ('2_mov', DEST(0)), 'w0')] +) + group_map(O_PSEL_TRIG, hdr=(I_IGRP_HDR_MAIN, [ ('oporg', 'p0_p1_p2'), @@ -2535,6 +2661,46 @@ group_map(O_ATOMIC, ] ) +group_map(O_SMP, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'be'), + ('olchk', OM_OLCHK), + ('w1p', False), + ('w0p', False), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', False), + ('rpt', 1) + ]), + enc_ops=[('backend', O_SMP)], + srcs=[ + ('s[0]', ('backend', SRC(1)), 's0'), + ('s[1]', ('backend', SRC(2)), 's1'), + ('s[2]', ('backend', SRC(3)), 's2'), + ('s[3]', ('backend', SRC(4)), 's3'), + ('s[4]', ('backend', DEST(0)), 's4') + ] +) + +group_map(O_ALPHATST, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'be'), + ('olchk', OM_OLCHK), + ('w1p', False), + ('w0p', False), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', OM_ATOM), + ('rpt', 1) + ]), + enc_ops=[('backend', O_ALPHATST)], + srcs=[ + ('s[0]', ('backend', SRC(1)), 's0'), + ('s[1]', ('backend', SRC(2)), 's1'), + ('s[2]', ('backend', SRC(3)), 's2'), + ] +) + group_map(O_MOVI32, hdr=(I_IGRP_HDR_BITWISE, [ ('opcnt', 'p0'), diff --git a/src/imagination/pco/pco_nir.c b/src/imagination/pco/pco_nir.c index 6ec773c574f..4f57c86829e 100644 --- a/src/imagination/pco/pco_nir.c +++ b/src/imagination/pco/pco_nir.c @@ -265,6 +265,9 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data) nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out); + NIR_PASS(_, nir, nir_lower_tex, &(nir_lower_tex_options){}); + NIR_PASS(_, nir, pco_nir_lower_tex, &data->common); + if (nir->info.stage == MESA_SHADER_FRAGMENT) { NIR_PASS(_, nir, pco_nir_pfo, &data->fs); } else if (nir->info.stage == MESA_SHADER_VERTEX) { diff --git a/src/imagination/pco/pco_nir_tex.c b/src/imagination/pco/pco_nir_tex.c new file mode 100644 index 00000000000..afa39a97745 --- /dev/null +++ b/src/imagination/pco/pco_nir_tex.c @@ -0,0 +1,568 @@ +/* + * Copyright © 2025 Imagination Technologies Ltd. + * + * SPDX-License-Identifier: MIT + */ + +/** + * \file pco_nir_tex.c + * + * \brief PCO NIR texture/sampler lowering passes. + */ + +#include "hwdef/rogue_hw_defs.h" +#include "nir.h" +#include "nir_builder.h" +#include "nir_builtin_builder.h" +#include "pco.h" +#include "pco_builder.h" +#include "pco_internal.h" +#include "util/macros.h" + +#include +#include +#include + +/* State word unpacking helpers. */ +#define STATE_UNPACK(b, state_word, word, start_bit, num_bits) \ + nir_ubitfield_extract_imm(b, state_word[word], start_bit, num_bits) + +#define STATE_UNPACK_ADD(b, state_word, word, start_bit, num_bits, val) \ + nir_iadd_imm(b, STATE_UNPACK(b, state_word, word, start_bit, num_bits), val) + +#define STATE_UNPACK_SHIFT(b, state_word, word, start_bit, num_bits, val) \ + nir_ishl(b, \ + nir_imm_int(b, val), \ + STATE_UNPACK(b, state_word, word, start_bit, num_bits)) + +static inline nir_def *get_src_def(nir_tex_instr *tex, + nir_tex_src_type src_type) +{ + int src_idx = nir_tex_instr_src_index(tex, src_type); + return src_idx >= 0 ? tex->src[src_idx].src.ssa : NULL; +} + +/** + * \brief Lowers a basic texture query (no sampling required). + * + * \param[in] b NIR builder. + * \param[in] tex NIR texture instruction. + * \param[in] tex_desc_set Texture descriptor set. + * \param[in] tex_binding Texture binding. + * \param[in] common Shader common data. + * \return The replacement/lowered def. + */ +static nir_def *lower_tex_query_basic(nir_builder *b, + nir_tex_instr *tex, + unsigned tex_desc_set, + unsigned tex_binding, + pco_common_data *common) +{ + /* Load texture state words. */ + nir_def *tex_state = nir_load_tex_state_pco(b, + ROGUE_NUM_TEXSTATE_DWORDS, + .desc_set = tex_desc_set, + .binding = tex_binding); + + nir_def *tex_state_word[] = { + [0] = nir_channel(b, tex_state, 0), + [1] = nir_channel(b, tex_state, 1), + [2] = nir_channel(b, tex_state, 2), + [3] = nir_channel(b, tex_state, 3), + }; + + switch (tex->op) { + case nir_texop_query_levels: + return STATE_UNPACK(b, tex_state_word, 2, 0, 4); + + case nir_texop_texture_samples: + return STATE_UNPACK_SHIFT(b, tex_state_word, 1, 30, 2, 1); + + case nir_texop_txs: { + unsigned num_comps = tex->def.num_components; + if (tex->is_array) + --num_comps; + + nir_def *size_comps[] = { + [0] = STATE_UNPACK_ADD(b, tex_state_word, 1, 2, 14, 1), + [1] = STATE_UNPACK_ADD(b, tex_state_word, 1, 16, 14, 1), + [2] = STATE_UNPACK_ADD(b, tex_state_word, 2, 4, 11, 1), + }; + + nir_def *base_level = STATE_UNPACK(b, tex_state_word, 3, 28, 4); + nir_def *lod = get_src_def(tex, nir_tex_src_lod); + assert(lod); + lod = nir_iadd(b, lod, base_level); + + for (unsigned c = 0; c < num_comps; ++c) + size_comps[c] = nir_umax_imm(b, nir_ushr(b, size_comps[c], lod), 1); + + if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D && tex->is_array) + size_comps[1] = size_comps[2]; + + return nir_vec(b, size_comps, tex->def.num_components); + } + + default: + break; + } + + UNREACHABLE(""); +} + +static inline enum pco_dim to_pco_dim(enum glsl_sampler_dim dim) +{ + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + return PCO_DIM_1D; + + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_MS: + case GLSL_SAMPLER_DIM_SUBPASS: + case GLSL_SAMPLER_DIM_SUBPASS_MS: + return PCO_DIM_2D; + + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + return PCO_DIM_3D; + + /* case GLSL_SAMPLER_DIM_RECT: */ + /* case GLSL_SAMPLER_DIM_EXTERNAL: */ + default: + break; + } + + UNREACHABLE(""); +} + +static nir_def * +lower_tex_query_lod(nir_builder *b, nir_def *coords, nir_def *smp_coeffs) +{ + nir_def *lod_dval_post_clamp = + nir_channel(b, smp_coeffs, ROGUE_SMP_COEFF_LOD_DVAL_POST_CLAMP); + nir_def *lod_dval_pre_clamp = + nir_channel(b, smp_coeffs, ROGUE_SMP_COEFF_LOD_DVAL_PRE_CLAMP); + nir_def *tfrac_post_clamp = + nir_channel(b, smp_coeffs, ROGUE_SMP_COEFF_TFRAC_POST_CLAMP); + nir_def *tfrac_pre_clamp = + nir_channel(b, smp_coeffs, ROGUE_SMP_COEFF_TFRAC_PRE_CLAMP); + + /* Unpack. */ + lod_dval_post_clamp = nir_fmul_imm(b, lod_dval_post_clamp, 255.0f); + lod_dval_pre_clamp = nir_fmul_imm(b, lod_dval_pre_clamp, 255.0f); + + tfrac_post_clamp = nir_fmul_imm(b, tfrac_post_clamp, 255.0f); + tfrac_pre_clamp = nir_fmul_imm(b, tfrac_pre_clamp, 255.0f); + + /* Scale. */ + tfrac_post_clamp = nir_fdiv_imm(b, tfrac_post_clamp, 256.0f); + tfrac_pre_clamp = nir_fdiv_imm(b, tfrac_pre_clamp, 256.0f); + + /* Calculate coord deltas. */ + nir_def *coord_deltas = nir_imm_int(b, 0); + for (unsigned c = 0; c < coords->num_components; ++c) { + nir_def *coord = nir_channel(b, coords, c); + coord_deltas = nir_fadd(b, + coord_deltas, + nir_fadd(b, + nir_fabs(b, nir_ddx(b, coord)), + nir_fabs(b, nir_ddy(b, coord)))); + } + + nir_def *lod_comps[2] = { + [0] = nir_fadd(b, lod_dval_post_clamp, tfrac_post_clamp), + [1] = nir_fadd( + b, + nir_fadd_imm(b, tfrac_pre_clamp, -128.0f), + nir_fcsel(b, coord_deltas, lod_dval_pre_clamp, nir_imm_float(b, 0.0f))), + }; + + return nir_vec(b, lod_comps, ARRAY_SIZE(lod_comps)); +} + +static inline unsigned process_coords(nir_builder *b, + bool is_array, + bool is_query_lod, + bool coords_are_float, + nir_def *coords, + nir_def **float_coords, + nir_def **int_coords, + nir_def **float_array_index, + nir_def **int_array_index) +{ + unsigned num_comps = coords->num_components; + + *float_coords = coords_are_float ? coords : nir_i2f32(b, coords); + *int_coords = !coords_are_float ? coords : nir_f2i32(b, coords); + *float_array_index = NULL; + *int_array_index = NULL; + + if (!is_array || is_query_lod) + return num_comps; + + *float_array_index = nir_channel(b, *float_coords, num_comps - 1); + *int_array_index = nir_channel(b, *int_coords, num_comps - 1); + + *float_coords = nir_trim_vector(b, *float_coords, num_comps - 1); + *int_coords = nir_trim_vector(b, *int_coords, num_comps - 1); + + return num_comps - 1; +} + +static inline bool tex_src_is_float(nir_tex_instr *tex, + nir_tex_src_type src_type) +{ + int src_idx = nir_tex_instr_src_index(tex, src_type); + assert(src_idx >= 0); + return nir_tex_instr_src_type(tex, src_idx) == nir_type_float; +} + +/* 40-bit address, shifted right by two: */ +static inline void unpack_base_addr(nir_builder *b, + nir_def *tex_state_word[static 4], + nir_def **base_addr_lo, + nir_def **base_addr_hi) +{ + *base_addr_lo = nir_imm_int(b, 0); + + /* addr_lo[17..2] */ + nir_def *lo_17_2 = STATE_UNPACK(b, tex_state_word, 2, 16, 16); + *base_addr_lo = nir_bitfield_insert_imm(b, *base_addr_lo, lo_17_2, 2, 16); + + /* addr_lo[31..18] */ + nir_def *lo_31_18 = STATE_UNPACK(b, tex_state_word, 3, 0, 14); + *base_addr_lo = nir_bitfield_insert_imm(b, *base_addr_lo, lo_31_18, 18, 14); + + /* addr_hi[7..0] */ + *base_addr_hi = STATE_UNPACK(b, tex_state_word, 3, 14, 8); +} + +/** + * \brief Lowers a texture instruction. + * + * \param[in] b NIR builder. + * \param[in] instr NIR instruction. + * \param[in] cb_data User callback data. + * \return The replacement/lowered def. + */ +static nir_def *lower_tex(nir_builder *b, nir_instr *instr, void *cb_data) +{ + nir_tex_instr *tex = nir_instr_as_tex(instr); + pco_common_data *common = cb_data; + + unsigned tex_desc_set; + unsigned tex_binding; + pco_unpack_desc(tex->texture_index, &tex_desc_set, &tex_binding); + + unsigned smp_desc_set; + unsigned smp_binding; + pco_unpack_desc(tex->sampler_index, &smp_desc_set, &smp_binding); + + bool hw_array_support = false; + bool hw_int_support = false; + + b->cursor = nir_before_instr(instr); + + if (nir_tex_instr_is_query(tex) && tex->op != nir_texop_lod) + return lower_tex_query_basic(b, tex, tex_desc_set, tex_binding, common); + + nir_def *tex_state = nir_load_tex_state_pco(b, + ROGUE_NUM_TEXSTATE_DWORDS, + .desc_set = tex_desc_set, + .binding = tex_binding); + + nir_def *smp_state = nir_load_smp_state_pco(b, + ROGUE_NUM_TEXSTATE_DWORDS, + .desc_set = smp_desc_set, + .binding = smp_binding); + + /* Process tex sources, build up the smp flags and data words. */ + BITSET_DECLARE(tex_src_set, nir_num_tex_src_types) = { 0 }; + nir_def *tex_srcs[nir_num_tex_src_types]; + nir_def *smp_data_comps[NIR_MAX_VEC_COMPONENTS]; + unsigned smp_data_comp_count = 0; + pco_smp_flags smp_flags = { + .dim = to_pco_dim(tex->sampler_dim), + .lod_mode = PCO_LOD_MODE_NORMAL, + }; + + for (unsigned s = 0; s < nir_num_tex_src_types; ++s) + if ((tex_srcs[s] = get_src_def(tex, s)) != NULL) + BITSET_SET(tex_src_set, s); + + nir_def *float_coords; + nir_def *int_coords; + nir_def *float_array_index; + nir_def *int_array_index; + unsigned num_coord_comps = + process_coords(b, + tex->is_array, + tex->op == nir_texop_lod, + tex_src_is_float(tex, nir_tex_src_coord), + tex_srcs[nir_tex_src_coord], + &float_coords, + &int_coords, + &float_array_index, + &int_array_index); + + bool use_int_coords = !tex_src_is_float(tex, nir_tex_src_coord) && + hw_int_support; + + assert(BITSET_TEST(tex_src_set, nir_tex_src_coord)); + if (BITSET_TEST(tex_src_set, nir_tex_src_coord)) { + for (unsigned c = 0; c < num_coord_comps; ++c) { + smp_data_comps[smp_data_comp_count++] = + nir_channel(b, use_int_coords ? int_coords : float_coords, c); + } + + BITSET_CLEAR(tex_src_set, nir_tex_src_coord); + } + + nir_def *proj = NULL; + if (BITSET_TEST(tex_src_set, nir_tex_src_projector)) { + assert(tex_src_is_float(tex, nir_tex_src_projector)); + proj = tex_srcs[nir_tex_src_projector]; + smp_data_comps[smp_data_comp_count++] = + use_int_coords ? nir_f2i32(b, proj) : proj; + + smp_flags.proj = true; + BITSET_CLEAR(tex_src_set, nir_tex_src_projector); + } + + if (hw_array_support && int_array_index) { + smp_data_comps[smp_data_comp_count++] = + use_int_coords ? int_array_index : float_array_index; + + smp_flags.array = true; + } + + assert((BITSET_TEST(tex_src_set, nir_tex_src_bias) + + BITSET_TEST(tex_src_set, nir_tex_src_lod) + + BITSET_TEST(tex_src_set, nir_tex_src_ddx)) < 2); + + bool lod_set = false; + if (BITSET_TEST(tex_src_set, nir_tex_src_bias)) { + nir_def *lod = tex_srcs[nir_tex_src_bias]; + + if (!tex_src_is_float(tex, nir_tex_src_bias)) + lod = nir_i2f32(b, lod); + + smp_data_comps[smp_data_comp_count++] = lod; + + smp_flags.pplod = true; + smp_flags.lod_mode = PCO_LOD_MODE_BIAS; + + lod_set = true; + BITSET_CLEAR(tex_src_set, nir_tex_src_bias); + } else if (BITSET_TEST(tex_src_set, nir_tex_src_lod)) { + nir_def *lod = tex_srcs[nir_tex_src_lod]; + + if (!tex_src_is_float(tex, nir_tex_src_lod)) + lod = nir_i2f32(b, lod); + + smp_data_comps[smp_data_comp_count++] = lod; + + smp_flags.pplod = true; + smp_flags.lod_mode = PCO_LOD_MODE_REPLACE; + + lod_set = true; + BITSET_CLEAR(tex_src_set, nir_tex_src_lod); + } else if (BITSET_TEST(tex_src_set, nir_tex_src_ddx)) { + assert(BITSET_TEST(tex_src_set, nir_tex_src_ddy)); + assert(tex_src_is_float(tex, nir_tex_src_ddx) && + tex_src_is_float(tex, nir_tex_src_ddy)); + + nir_def *ddx = tex_srcs[nir_tex_src_ddx]; + nir_def *ddy = tex_srcs[nir_tex_src_ddy]; + + for (unsigned c = 0; c < ddx->num_components; ++c) { + smp_data_comps[smp_data_comp_count++] = nir_channel(b, ddx, c); + smp_data_comps[smp_data_comp_count++] = nir_channel(b, ddy, c); + } + + smp_flags.lod_mode = PCO_LOD_MODE_GRADIENTS; + + lod_set = true; + BITSET_CLEAR(tex_src_set, nir_tex_src_ddx); + BITSET_CLEAR(tex_src_set, nir_tex_src_ddy); + } + + if (!hw_array_support && int_array_index) { + /* Set a per-pixel lod bias of 0 if none has been set yet. */ + if (!lod_set) { + smp_data_comps[smp_data_comp_count++] = nir_imm_int(b, 0); + smp_flags.pplod = true; + smp_flags.lod_mode = PCO_LOD_MODE_BIAS; + lod_set = true; + } + + nir_def *tex_state_word[] = { + [0] = nir_channel(b, tex_state, 0), + [1] = nir_channel(b, tex_state, 1), + [2] = nir_channel(b, tex_state, 2), + [3] = nir_channel(b, tex_state, 3), + }; + + nir_def *base_addr_lo; + nir_def *base_addr_hi; + unpack_base_addr(b, tex_state_word, &base_addr_lo, &base_addr_hi); + + nir_def *array_index = int_array_index; + assert(array_index); + + nir_def *array_max = STATE_UNPACK(b, tex_state_word, 2, 4, 11); + array_index = nir_uclamp(b, array_index, nir_imm_int(b, 0), array_max); + + nir_def *tex_meta = nir_load_tex_meta_pco(b, + PCO_IMAGE_META_COUNT, + .desc_set = tex_desc_set, + .binding = tex_binding); + + nir_def *array_stride = + nir_channel(b, tex_meta, PCO_IMAGE_META_LAYER_SIZE); + + nir_def *array_offset = nir_imul(b, array_index, array_stride); + + nir_def *addr = + nir_uadd64_32(b, base_addr_lo, base_addr_hi, array_offset); + + smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 0); + smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 1); + + smp_flags.tao = true; + } + + if (BITSET_TEST(tex_src_set, nir_tex_src_offset) || + BITSET_TEST(tex_src_set, nir_tex_src_ms_index)) { + nir_def *lookup = nir_imm_int(b, 0); + + if (BITSET_TEST(tex_src_set, nir_tex_src_offset)) { + nir_def *offset = tex_srcs[nir_tex_src_offset]; + const unsigned packed_offset_start[] = { 0, 6, 12 }; + const unsigned packed_offset_size[] = { 6, 6, 4 }; + + for (unsigned c = 0; c < offset->num_components; ++c) { + lookup = nir_bitfield_insert(b, + lookup, + nir_channel(b, offset, c), + nir_imm_int(b, packed_offset_start[c]), + nir_imm_int(b, packed_offset_size[c])); + } + + smp_flags.soo = true; + BITSET_CLEAR(tex_src_set, nir_tex_src_offset); + } + + if (BITSET_TEST(tex_src_set, nir_tex_src_ms_index)) { + lookup = nir_bitfield_insert(b, + lookup, + tex_srcs[nir_tex_src_ms_index], + nir_imm_int(b, 16), + nir_imm_int(b, 3)); + + smp_flags.sno = true; + BITSET_CLEAR(tex_src_set, nir_tex_src_ms_index); + } + + smp_data_comps[smp_data_comp_count++] = lookup; + } + + /* Shadow comparator. */ + nir_def *comparator = NULL; + if (BITSET_TEST(tex_src_set, nir_tex_src_comparator)) { + comparator = tex_srcs[nir_tex_src_comparator]; + + if (proj) + comparator = nir_fdiv(b, comparator, proj); + + BITSET_CLEAR(tex_src_set, nir_tex_src_comparator); + } + + assert(BITSET_IS_EMPTY(tex_src_set)); + + /* Pad out the rest of the data words. */ + assert(smp_data_comp_count <= NIR_MAX_VEC_COMPONENTS); + for (unsigned c = smp_data_comp_count; c < ARRAY_SIZE(smp_data_comps); ++c) + smp_data_comps[c] = nir_imm_int(b, 0); + + nir_def *smp_data = nir_vec(b, smp_data_comps, ARRAY_SIZE(smp_data_comps)); + + nir_def *result; + switch (tex->op) { + case nir_texop_lod: + result = nir_smp_coeffs_pco(b, + smp_data, + tex_state, + smp_state, + .smp_flags_pco = smp_flags._, + .range = smp_data_comp_count); + + result = lower_tex_query_lod(b, float_coords, result); + break; + + case nir_texop_txf: + case nir_texop_txf_ms: + smp_flags.nncoords = true; + FALLTHROUGH; + + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txd: + case nir_texop_txl: + smp_flags.integer = use_int_coords; + smp_flags.fcnorm = nir_alu_type_get_base_type(tex->dest_type) == + nir_type_float; + + result = nir_smp_pco(b, + tex->def.num_components, + smp_data, + tex_state, + smp_state, + .smp_flags_pco = smp_flags._, + .range = smp_data_comp_count); + break; + + default: + UNREACHABLE(""); + } + + if (tex->is_shadow) { + assert(result->num_components == 1); + + nir_def *compare_op = + nir_load_smp_meta_pco(b, + 1, + .desc_set = smp_desc_set, + .binding = smp_binding, + .component = PCO_SAMPLER_META_COMPARE_OP); + + result = nir_alphatst_pco(b, result, comparator, compare_op); + } + + return result; +} + +/** + * \brief Filters texture instructions. + * + * \param[in] instr NIR instruction. + * \param[in] cb_data User callback data. + * \return True if the instruction matches the filter. + */ +static bool is_tex(const nir_instr *instr, UNUSED const void *cb_data) +{ + return instr->type == nir_instr_type_tex; +} + +/** + * \brief Texture lowering pass. + * + * \param[in,out] shader NIR shader. + * \param[in] common Shader common data. + * \return True if the pass made progress. + */ +bool pco_nir_lower_tex(nir_shader *shader, pco_common_data *common) +{ + return nir_shader_lower_instructions(shader, is_tex, lower_tex, common); +} diff --git a/src/imagination/pco/pco_nir_vk.c b/src/imagination/pco/pco_nir_vk.c index 3f41dd52f63..876a103473f 100644 --- a/src/imagination/pco/pco_nir_vk.c +++ b/src/imagination/pco/pco_nir_vk.c @@ -62,6 +62,45 @@ static nir_def *lower_load_vulkan_descriptor(nir_builder *b, return nir_imm_ivec3(b, desc_set_binding, elem, 0); } +static void lower_tex_deref_to_binding(nir_tex_instr *tex, + unsigned deref_index, + pco_common_data *common) +{ + nir_tex_src *deref_src = &tex->src[deref_index]; + nir_deref_instr *deref = + nir_instr_as_deref(deref_src->src.ssa->parent_instr); + + assert(deref->deref_type == nir_deref_type_var); + + /* TODO: array support */ + + unsigned desc_set = deref->var->data.descriptor_set; + unsigned binding = deref->var->data.binding; + + set_resource_used(common, desc_set, binding); + + uint32_t desc_set_binding = pco_pack_desc(desc_set, binding); + if (deref_src->src_type == nir_tex_src_texture_deref) + tex->texture_index = desc_set_binding; + else + tex->sampler_index = desc_set_binding; + + nir_tex_instr_remove_src(tex, deref_index); +} + +static inline void lower_tex_derefs(nir_tex_instr *tex, pco_common_data *common) +{ + int deref_index; + + deref_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + if (deref_index >= 0) + lower_tex_deref_to_binding(tex, deref_index, common); + + deref_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + if (deref_index >= 0) + lower_tex_deref_to_binding(tex, deref_index, common); +} + /** * \brief Lowers a Vulkan-related instruction. * @@ -73,11 +112,26 @@ static nir_def *lower_load_vulkan_descriptor(nir_builder *b, static nir_def *lower_vk(nir_builder *b, nir_instr *instr, void *cb_data) { pco_common_data *common = cb_data; - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - switch (intr->intrinsic) { - case nir_intrinsic_load_vulkan_descriptor: - return lower_load_vulkan_descriptor(b, intr, common); + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_load_vulkan_descriptor: + return lower_load_vulkan_descriptor(b, intr, common); + + default: + break; + } + + break; + } + + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + lower_tex_derefs(tex, common); + return NIR_LOWER_INSTR_PROGRESS; + } default: break; @@ -95,13 +149,29 @@ static nir_def *lower_vk(nir_builder *b, nir_instr *instr, void *cb_data) */ static bool is_vk(const nir_instr *instr, UNUSED const void *cb_data) { - if (instr->type != nir_instr_type_intrinsic) - return false; + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_load_vulkan_descriptor: + return true; - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - switch (intr->intrinsic) { - case nir_intrinsic_load_vulkan_descriptor: - return true; + default: + break; + } + + break; + } + + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (nir_tex_instr_src_index(tex, nir_tex_src_texture_deref) >= 0 || + nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) >= 0) { + return true; + } + + FALLTHROUGH; + } default: break; diff --git a/src/imagination/pco/pco_ops.py b/src/imagination/pco/pco_ops.py index 74d760a1baa..97f69661424 100644 --- a/src/imagination/pco/pco_ops.py +++ b/src/imagination/pco/pco_ops.py @@ -171,7 +171,11 @@ OM_SIGNPOS = op_mod_enum('signpos', [ 'mtb', 'ftb', ]) -OM_DIM = op_mod('dim', BaseType.uint) +OM_DIM = op_mod_enum('dim', [ + '1d', + '2d', + '3d', +]) OM_PROJ = op_mod('proj', BaseType.bool) OM_FCNORM = op_mod('fcnorm', BaseType.bool) OM_NNCOORDS = op_mod('nncoords', BaseType.bool) @@ -179,22 +183,24 @@ OM_LOD_MODE = op_mod_enum('lod_mode', [ ('normal', ''), ('bias', 'bias'), ('replace', 'replace'), - ('gradient', 'gradient'), + ('gradients', 'gradients'), ]) OM_PPLOD = op_mod('pplod', BaseType.bool) OM_TAO = op_mod('tao', BaseType.bool) OM_SOO = op_mod('soo', BaseType.bool) OM_SNO = op_mod('sno', BaseType.bool) -OM_WRT = op_mod('wrt', BaseType.bool) OM_SB_MODE = op_mod_enum('sb_mode', [ ('none', ''), - ('data', 'data'), - ('info', 'info'), + ('rawdata', 'rawdata'), + ('coeffs', 'coeffs'), ('both', 'both'), ]) OM_ARRAY = op_mod('array', BaseType.bool) OM_INTEGER = op_mod('integer', BaseType.bool) -OM_SCHEDSWAP = op_mod('schedswap', BaseType.bool) +OM_SCHEDSWAP = op_mod_enum('schedswap', [ + ('none', ''), + ('swap', 'schedswap'), +]) OM_F16 = op_mod('f16', BaseType.bool) OM_TILED = op_mod('tiled', BaseType.bool) OM_FREEP = op_mod('freep', BaseType.bool) @@ -268,9 +274,9 @@ OM_ITR_MODE = op_mod_enum('itr_mode', [ 'centroid', ]) OM_SCHED = op_mod_enum('sched', [ - 'none', - 'swap', - 'wdf', + ('none', ''), + ('swap', 'schedswap'), + ('wdf', 'schedwdf'), ]) OM_ATOM = op_mod('atom', BaseType.bool, unset=True) OM_OLCHK = op_mod('olchk', BaseType.bool, unset=True) @@ -363,6 +369,13 @@ O_LD = hw_op('ld', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_LD], 1, 3) O_ST = hw_direct_op('st', [OM_MCU_CACHE_MODE_ST], 0, 6) O_ATOMIC = hw_op('atomic', [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM_OP], 1, 2) +O_SMP = hw_op('smp', OM_ALU_RPT1 + [OM_DIM, OM_PROJ, OM_FCNORM, OM_NNCOORDS, + OM_LOD_MODE, OM_PPLOD, OM_TAO, OM_SOO, + OM_SNO, OM_SB_MODE, OM_MCU_CACHE_MODE_LD, + OM_ARRAY, OM_INTEGER, OM_SCHEDSWAP, OM_F16], 1, 6) + +O_ALPHATST = hw_op('alphatst', OM_ALU_RPT1, 1, 4) + ## Bitwise. O_MOVI32 = hw_op('movi32', OM_ALU, 1, 1) @@ -406,6 +419,7 @@ O_SCMP = hw_op('scmp', OM_ALU + [OM_TST_OP_MAIN], 1, 2, [], [[RM_ABS, RM_NEG], [ O_BCMP = hw_op('bcmp', OM_ALU + [OM_TST_OP_MAIN, OM_TST_TYPE_MAIN], 1, 2, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) O_BCSEL = hw_op('bcsel', OM_ALU, 1, 3, [], [[], [RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) O_CSEL = hw_op('csel', OM_ALU + [OM_TST_OP_MAIN, OM_TST_TYPE_MAIN], 1, 3, [], [[], [RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) +O_PSEL = hw_op('psel', OM_ALU, 1, 3) O_PSEL_TRIG = hw_op('psel_trig', OM_ALU, 1, 3) O_FSIGN = hw_op('fsign', OM_ALU, 1, 1) O_ISIGN = hw_op('isign', OM_ALU, 1, 1) diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index bc9fd91d11d..129db439fa5 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -423,13 +423,11 @@ trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src) } static unsigned fetch_resource_base_reg(const pco_common_data *common, - uint32_t packed_desc, - unsigned elem) + unsigned desc_set, + unsigned binding, + unsigned elem, + bool *is_img_smp) { - unsigned desc_set; - unsigned binding; - pco_unpack_desc(packed_desc, &desc_set, &binding); - assert(desc_set < ARRAY_SIZE(common->desc_sets)); const pco_descriptor_set_data *desc_set_data = &common->desc_sets[desc_set]; assert(desc_set_data->used); @@ -438,14 +436,27 @@ static unsigned fetch_resource_base_reg(const pco_common_data *common, const pco_binding_data *binding_data = &desc_set_data->bindings[binding]; assert(binding_data->used); + if (is_img_smp) + *is_img_smp = binding_data->is_img_smp; + unsigned reg_offset = elem * binding_data->range.stride; assert(reg_offset < binding_data->range.count); unsigned reg_index = binding_data->range.start + reg_offset; - return reg_index; } +static unsigned fetch_resource_base_reg_packed(const pco_common_data *common, + uint32_t packed_desc, + unsigned elem, + bool *is_img_smp) +{ + unsigned desc_set; + unsigned binding; + pco_unpack_desc(packed_desc, &desc_set, &binding); + + return fetch_resource_base_reg(common, desc_set, binding, elem, is_img_smp); +} static pco_instr *trans_load_buffer(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest, @@ -459,7 +470,8 @@ static pco_instr *trans_load_buffer(trans_ctx *tctx, uint32_t packed_desc = nir_src_comp_as_uint(intr->src[0], 0); unsigned elem = nir_src_comp_as_uint(intr->src[0], 1); - unsigned sh_index = fetch_resource_base_reg(common, packed_desc, elem); + unsigned sh_index = + fetch_resource_base_reg_packed(common, packed_desc, elem, NULL); pco_ref base_addr[2]; pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr); @@ -498,7 +510,8 @@ static pco_instr *trans_store_buffer(trans_ctx *tctx, uint32_t packed_desc = nir_src_comp_as_uint(intr->src[1], 0); unsigned elem = nir_src_comp_as_uint(intr->src[1], 1); - unsigned sh_index = fetch_resource_base_reg(common, packed_desc, elem); + unsigned sh_index = + fetch_resource_base_reg_packed(common, packed_desc, elem, NULL); pco_ref base_addr[2]; pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr); @@ -593,7 +606,8 @@ static pco_instr *trans_atomic_buffer(trans_ctx *tctx, uint32_t packed_desc = nir_src_comp_as_uint(intr->src[0], 0); unsigned elem = nir_src_comp_as_uint(intr->src[0], 1); - unsigned sh_index = fetch_resource_base_reg(common, packed_desc, elem); + unsigned sh_index = + fetch_resource_base_reg_packed(common, packed_desc, elem, NULL); pco_ref base_addr[2]; pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr); @@ -681,6 +695,162 @@ trans_load_sysval(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest) return pco_mov(&tctx->b, dest, src, .rpt = chans); } +static bool desc_set_binding_is_comb_img_smp(unsigned desc_set, + unsigned binding, + const pco_common_data *common) +{ + const pco_descriptor_set_data *desc_set_data = &common->desc_sets[desc_set]; + assert(desc_set_data->used); + assert(desc_set_data->bindings && binding < desc_set_data->binding_count); + + const pco_binding_data *binding_data = &desc_set_data->bindings[binding]; + assert(binding_data->used); + + return binding_data->is_img_smp; +} + +static pco_instr *lower_load_tex_smp_state(trans_ctx *tctx, + nir_intrinsic_instr *intr, + pco_ref dest, + bool smp) +{ + unsigned desc_set = nir_intrinsic_desc_set(intr); + unsigned binding = nir_intrinsic_binding(intr); + unsigned start_comp = nir_intrinsic_component(intr); + unsigned chans = pco_ref_get_chans(dest); + assert(start_comp + chans <= ROGUE_NUM_TEXSTATE_DWORDS); + + /* TODO: array support. */ + const pco_common_data *common = &tctx->shader->data.common; + bool is_img_smp; + unsigned sh_index = + fetch_resource_base_reg(common, desc_set, binding, 0, &is_img_smp); + pco_ref state_words = + pco_ref_hwreg_vec(sh_index, PCO_REG_CLASS_SHARED, chans); + + /* Sampler state comes after image state and metadata in combined image + * samplers. + */ + if (smp && is_img_smp) { + state_words = pco_ref_offset(state_words, ROGUE_NUM_TEXSTATE_DWORDS); + state_words = pco_ref_offset(state_words, PCO_IMAGE_META_COUNT); + } + + state_words = pco_ref_offset(state_words, start_comp); + + return pco_mov(&tctx->b, dest, state_words, .rpt = chans); +} + +static pco_instr *lower_load_tex_smp_meta(trans_ctx *tctx, + nir_intrinsic_instr *intr, + pco_ref dest, + bool smp) +{ + unsigned desc_set = nir_intrinsic_desc_set(intr); + unsigned binding = nir_intrinsic_binding(intr); + unsigned start_comp = nir_intrinsic_component(intr); + unsigned chans = pco_ref_get_chans(dest); + + /* TODO: array support. */ + const pco_common_data *common = &tctx->shader->data.common; + bool is_img_smp; + unsigned sh_index = + fetch_resource_base_reg(common, desc_set, binding, 0, &is_img_smp); + pco_ref state_words = + pco_ref_hwreg_vec(sh_index, PCO_REG_CLASS_SHARED, chans); + + assert(start_comp + chans <= + (smp ? PCO_SAMPLER_META_COUNT : PCO_IMAGE_META_COUNT)); + + state_words = pco_ref_offset(state_words, ROGUE_NUM_TEXSTATE_DWORDS); + + if (smp && is_img_smp) { + state_words = pco_ref_offset(state_words, PCO_IMAGE_META_COUNT); + state_words = pco_ref_offset(state_words, ROGUE_NUM_TEXSTATE_DWORDS); + } + + state_words = pco_ref_offset(state_words, start_comp); + + return pco_mov(&tctx->b, dest, state_words, .rpt = chans); +} + +static pco_instr *lower_smp(trans_ctx *tctx, + nir_intrinsic_instr *intr, + pco_ref *dest, + pco_ref data, + pco_ref tex_state, + pco_ref smp_state) +{ + pco_smp_flags smp_flags = { ._ = nir_intrinsic_smp_flags_pco(intr) }; + unsigned data_comps = nir_intrinsic_range(intr); + + data = pco_ref_chans(data, data_comps); + + unsigned chans = pco_ref_get_chans(*dest); + enum pco_sb_mode sb_mode = PCO_SB_MODE_NONE; + switch (intr->intrinsic) { + case nir_intrinsic_smp_coeffs_pco: + /* Shrink the destination to its actual size. */ + *dest = pco_ref_chans(*dest, ROGUE_SMP_COEFF_COUNT); + chans = 1; /* Chans must be 1 for coeff mode. */ + + sb_mode = PCO_SB_MODE_COEFFS; + break; + + case nir_intrinsic_smp_pco: + /* Destination and chans should be correct. */ + break; + + default: + UNREACHABLE(""); + } + + pco_ref shared_lod = pco_ref_null(); + + pco_instr *smp = pco_smp(&tctx->b, + *dest, + pco_ref_drc(PCO_DRC_0), + tex_state, + data, + smp_state, + shared_lod, + pco_ref_imm8(chans), + .dim = smp_flags.dim, + .proj = smp_flags.proj, + .fcnorm = smp_flags.fcnorm, + .nncoords = smp_flags.nncoords, + .lod_mode = smp_flags.lod_mode, + .pplod = smp_flags.pplod, + .tao = smp_flags.tao, + .soo = smp_flags.soo, + .sno = smp_flags.sno, + .sb_mode = sb_mode, + .array = smp_flags.array, + .integer = smp_flags.integer); + + return smp; +} + +static pco_instr *lower_alphatst(trans_ctx *tctx, + pco_ref dest, + pco_ref src0, + pco_ref src1, + pco_ref src2) +{ + pco_alphatst(&tctx->b, + pco_ref_pred(PCO_PRED_P0), + pco_ref_drc(PCO_DRC_0), + src0, + src1, + src2); + + return pco_psel(&tctx->b, + dest, + pco_ref_pred(PCO_PRED_P0), + pco_fone, + pco_zero); +} + /** * \brief Translates a NIR intrinsic instruction into PCO. * @@ -762,6 +932,32 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) : pco_fdsy(&tctx->b, dest, src[0]); break; + /* Texture-related intrinsics. */ + case nir_intrinsic_load_tex_state_pco: + instr = lower_load_tex_smp_state(tctx, intr, dest, false); + break; + + case nir_intrinsic_load_smp_state_pco: + instr = lower_load_tex_smp_state(tctx, intr, dest, true); + break; + + case nir_intrinsic_load_tex_meta_pco: + instr = lower_load_tex_smp_meta(tctx, intr, dest, false); + break; + + case nir_intrinsic_load_smp_meta_pco: + instr = lower_load_tex_smp_meta(tctx, intr, dest, true); + break; + + case nir_intrinsic_smp_coeffs_pco: + case nir_intrinsic_smp_pco: + instr = lower_smp(tctx, intr, &dest, src[0], src[1], src[2]); + break; + + case nir_intrinsic_alphatst_pco: + instr = lower_alphatst(tctx, dest, src[0], src[1], src[2]); + break; + default: printf("Unsupported intrinsic: \""); nir_print_instr(&intr->instr, stdout);