pco: initial texture/sampler compiler support

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2024-12-09 12:42:35 +00:00 committed by Marge Bot
parent 6d96c9334a
commit 7df32ba09d
12 changed files with 1130 additions and 31 deletions

View file

@ -2681,3 +2681,26 @@ intrinsic("enqueue_node_payloads", src_comp=[-1])
# Returns true if it has been called for every payload.
intrinsic("finalize_incoming_node_payload", src_comp=[-1], dest_comp=1)
# Loads the texture/sampler state words for a given descriptor.
intrinsic("load_tex_state_pco", dest_comp=0, indices=[DESC_SET, BINDING, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
intrinsic("load_smp_state_pco", dest_comp=0, indices=[DESC_SET, BINDING, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
# Loads the texture/sampler metadata for a given descriptor.
intrinsic("load_tex_meta_pco", dest_comp=0, indices=[DESC_SET, BINDING, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
intrinsic("load_smp_meta_pco", dest_comp=0, indices=[DESC_SET, BINDING, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])
index("uint16_t", "smp_flags_pco")
# smp_pco(data, tex_state, smp_state)
# Performs a standard sampling operation with the given data and state words.
# Outputs between 1-4 comps.
intrinsic("smp_pco", src_comp=[16, 4, 4], dest_comp=0, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32])
# smp_coeffs_pco(data, tex_state, smp_state)
# Returns the calculated sampling coefficients for the given data and state words.
intrinsic("smp_coeffs_pco", src_comp=[16, 4, 4], dest_comp=8, indices=[SMP_FLAGS_PCO, RANGE], bit_sizes=[32])
# alphatst_pco(data, comparator, comparison op)
# Performs an alpha test on the given parameters, returning float 0/1 depending on the comparison result.
intrinsic("alphatst_pco", src_comp=[1, 1, 1], dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER], bit_sizes=[32])

View file

@ -105,6 +105,8 @@
/* Number of TEXSTATE_SAMPLER state words that need setting up. */
#define ROGUE_NUM_TEXSTATE_SAMPLER_WORDS 2U
#define ROGUE_NUM_TEXSTATE_DWORDS 4U
/* 12 dwords reserved for shared register management. The first dword is the
* number of shared register blocks to reload. Should be a multiple of 4 dwords,
* size in bytes.
@ -173,4 +175,17 @@ enum {
ROGUE_USC_COEFFICIENT_SET_SIZE,
};
enum {
ROGUE_SMP_COEFF_UFRAC = 0,
ROGUE_SMP_COEFF_VFRAC,
ROGUE_SMP_COEFF_SFRAC,
ROGUE_SMP_COEFF_LOD_DVAL_POST_CLAMP,
ROGUE_SMP_COEFF_LOD_DVAL_PRE_CLAMP,
ROGUE_SMP_COEFF_TFRAC_POST_CLAMP,
ROGUE_SMP_COEFF_TFRAC_PRE_CLAMP,
ROGUE_SMP_COEFF_COUNT,
};
#define ROGUE_SMP_MAX_DATA_WORDS 18U
#endif /* ROGUE_HW_DEFS_H */

View file

@ -18,6 +18,7 @@ libpowervr_compiler_files = files(
'pco_nir.c',
'pco_nir_compute.c',
'pco_nir_pvfio.c',
'pco_nir_tex.c',
'pco_nir_vk.c',
'pco_opt.c',
'pco_print.c',

View file

@ -74,6 +74,26 @@ typedef struct _pco_cs_data {
unsigned workgroup_size[3]; /** Workgroup size. */
} pco_cs_data;
/** PCO image descriptor metadata. */
enum pco_image_meta {
PCO_IMAGE_META_LAYER_SIZE,
PCO_IMAGE_META_RSVD0,
PCO_IMAGE_META_RSVD1,
PCO_IMAGE_META_RSVD2,
PCO_IMAGE_META_COUNT,
};
/** PCO sampler descriptor metadata. */
enum pco_sampler_meta {
PCO_SAMPLER_META_COMPARE_OP,
PCO_SAMPLER_META_RSVD0,
PCO_SAMPLER_META_RSVD1,
PCO_SAMPLER_META_RSVD2,
PCO_SAMPLER_META_COUNT,
};
/** PCO descriptor binding data. */
typedef struct _pco_binding_data {
pco_range range; /** Descriptor location range. */

View file

@ -1522,6 +1522,28 @@ static inline bool pco_should_print_binary(pco_shader *shader)
return true;
}
/* Interface with NIR. */
typedef union PACKED _pco_smp_flags {
struct PACKED {
unsigned dim : 2;
bool proj : 1;
bool fcnorm : 1;
bool nncoords : 1;
enum pco_lod_mode lod_mode : 2;
bool pplod : 1;
bool tao : 1;
bool soo : 1;
bool sno : 1;
bool array : 1;
bool integer : 1;
unsigned pad : 3;
};
uint16_t _;
} pco_smp_flags;
static_assert(sizeof(pco_smp_flags) == sizeof(uint16_t),
"sizeof(pco_smp_flags) != sizeof(uint16_t)");
/* PCO IR passes. */
bool pco_const_imms(pco_shader *shader);
bool pco_bool(pco_shader *shader);
@ -1534,6 +1556,7 @@ bool pco_legalize(pco_shader *shader);
bool pco_nir_compute_instance_check(nir_shader *shader);
bool pco_nir_lower_algebraic(nir_shader *shader);
bool pco_nir_lower_algebraic_late(nir_shader *shader);
bool pco_nir_lower_tex(nir_shader *shader, pco_common_data *common);
bool pco_nir_lower_vk(nir_shader *shader, pco_common_data *common);
bool pco_nir_pfo(nir_shader *shader, pco_fs_data *fs);
bool pco_nir_point_size(nir_shader *shader);

View file

@ -1820,8 +1820,8 @@ F_SBMODE = field_enum_type(
name='sbmode', num_bits=2,
elems=[
('none', 0b00),
('data', 0b01),
('info', 0b10),
('rawdata', 0b01),
('coeffs', 0b10),
('both', 0b11),
])

View file

@ -310,6 +310,31 @@ enum_map(OM_FRED_PART.t, F_RED_PART, [
('b', 'b'),
])
enum_map(OM_DIM.t, F_DMN, [
('1d', '1d'),
('2d', '2d'),
('3d', '3d'),
])
enum_map(OM_LOD_MODE.t, F_LODM, [
('normal', 'normal'),
('bias', 'bias'),
('replace', 'replace'),
('gradients', 'gradients'),
])
enum_map(OM_SB_MODE.t, F_SBMODE, [
('none', 'none'),
('rawdata', 'rawdata'),
('coeffs', 'coeffs'),
('both', 'both'),
])
enum_map(OM_SCHEDSWAP.t, F_SCHED_CTRL, [
('none', 'none'),
('swap', 'swap'),
])
class OpRef(object):
def __init__(self, ref_type, index, mods):
self.type = ref_type
@ -1320,6 +1345,75 @@ encode_map(O_ATOMIC,
op_ref_maps=[('backend', [['s0', 's1', 's2', 's3', 's4', 's5']], ['drc', ['s0', 's1', 's2', 's3', 's4', 's5']])]
)
encode_map(O_SMP,
encodings=[
(I_SMP_EXTAB, [
('fcnorm', OM_FCNORM),
('drc', ('pco_ref_get_drc', SRC(0))),
('dmn', OM_DIM),
('chan', ('pco_ref_get_imm', SRC(5))),
('lodm', OM_LOD_MODE),
('pplod', OM_PPLOD),
('proj', OM_PROJ),
('sbmode', OM_SB_MODE),
('nncoords', OM_NNCOORDS),
('sno', OM_SNO),
('soo', OM_SOO),
('tao', OM_TAO),
('f16', OM_F16),
('swap', OM_SCHEDSWAP),
('cachemode_ld', OM_MCU_CACHE_MODE_LD)
]),
(I_SMP_EXTA, [
('fcnorm', OM_FCNORM),
('drc', ('pco_ref_get_drc', SRC(0))),
('dmn', OM_DIM),
('chan', ('pco_ref_get_imm', SRC(5))),
('lodm', OM_LOD_MODE),
('pplod', OM_PPLOD),
('proj', OM_PROJ),
('sbmode', OM_SB_MODE),
('nncoords', OM_NNCOORDS),
('sno', OM_SNO),
('soo', OM_SOO),
('tao', OM_TAO)
], [
(OM_F16, '== false'),
(OM_SCHEDSWAP, '== PCO_SCHEDSWAP_NONE'),
(OM_MCU_CACHE_MODE_LD, '== PCO_CACHEMODE_LD_NORMAL')
]),
(I_SMP_BRIEF, [
('fcnorm', OM_FCNORM),
('drc', ('pco_ref_get_drc', SRC(0))),
('dmn', OM_DIM),
('chan', ('pco_ref_get_imm', SRC(5))),
('lodm', OM_LOD_MODE)
], [
(OM_F16, '== false'),
(OM_SCHEDSWAP, '== PCO_SCHEDSWAP_NONE'),
(OM_MCU_CACHE_MODE_LD, '== PCO_CACHEMODE_LD_NORMAL'),
(OM_PPLOD, '== false'),
(OM_PROJ, '== false'),
(OM_SB_MODE, '== PCO_SB_MODE_NONE'),
(OM_NNCOORDS, '== false'),
(OM_SNO, '== false'),
(OM_SOO, '== false'),
(OM_TAO, '== false')
])
],
op_ref_maps=[('backend', ['s4'], ['drc', 's0', 's1', 's2', ['s3', '_'], 'imm'])]
)
encode_map(O_ALPHATST,
encodings=[
(I_VISTEST_ATST, [
('pwen', ('!pco_ref_is_null', DEST(0))),
('ifb', True)
])
],
op_ref_maps=[('backend', [['p0', '_']], ['drc', 's0', 's1', 's2'])]
)
encode_map(O_BBYP0BM,
encodings=[
(I_PHASE0_SRC, [
@ -2088,6 +2182,38 @@ group_map(O_CSEL,
]
)
group_map(O_PSEL,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p1_p2'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', OM_RPT)
]),
enc_ops=[
('0', O_IMADD64, ['ft0', 'fte'], ['pco_zero', 'pco_zero', 'pco_zero', 'is0', SRC(0)]),
('1', O_MBYP, ['ft1'], [SRC(1)]),
('2_tst', O_TST, ['ftt', '_'], ['is1', '_'], [(OM_TST_OP_MAIN, 'zero'), (OM_TST_TYPE_MAIN, 'u32'), (OM_PHASE2END, True)]),
('2_mov', O_MOVC, [DEST(0), '_'], ['ftt', SRC(2), 'is4', '_', '_'])
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(2)), 's2'),
('s[3]', ('1', SRC(0)), 's3'),
('s[4]', ('2_mov', SRC(1)), 'fte'),
],
iss=[
('is[0]', 's4'),
('is[1]', 'ft0'),
('is[4]', 'ft1'),
],
dests=[('w[0]', ('2_mov', DEST(0)), 'w0')]
)
group_map(O_PSEL_TRIG,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p1_p2'),
@ -2535,6 +2661,46 @@ group_map(O_ATOMIC,
]
)
group_map(O_SMP,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'be'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', False),
('rpt', 1)
]),
enc_ops=[('backend', O_SMP)],
srcs=[
('s[0]', ('backend', SRC(1)), 's0'),
('s[1]', ('backend', SRC(2)), 's1'),
('s[2]', ('backend', SRC(3)), 's2'),
('s[3]', ('backend', SRC(4)), 's3'),
('s[4]', ('backend', DEST(0)), 's4')
]
)
group_map(O_ALPHATST,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'be'),
('olchk', OM_OLCHK),
('w1p', False),
('w0p', False),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', OM_ATOM),
('rpt', 1)
]),
enc_ops=[('backend', O_ALPHATST)],
srcs=[
('s[0]', ('backend', SRC(1)), 's0'),
('s[1]', ('backend', SRC(2)), 's1'),
('s[2]', ('backend', SRC(3)), 's2'),
]
)
group_map(O_MOVI32,
hdr=(I_IGRP_HDR_BITWISE, [
('opcnt', 'p0'),

View file

@ -265,6 +265,9 @@ void pco_lower_nir(pco_ctx *ctx, nir_shader *nir, pco_data *data)
nir_io_add_const_offset_to_base,
nir_var_shader_in | nir_var_shader_out);
NIR_PASS(_, nir, nir_lower_tex, &(nir_lower_tex_options){});
NIR_PASS(_, nir, pco_nir_lower_tex, &data->common);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, nir, pco_nir_pfo, &data->fs);
} else if (nir->info.stage == MESA_SHADER_VERTEX) {

View file

@ -0,0 +1,568 @@
/*
* Copyright © 2025 Imagination Technologies Ltd.
*
* SPDX-License-Identifier: MIT
*/
/**
* \file pco_nir_tex.c
*
* \brief PCO NIR texture/sampler lowering passes.
*/
#include "hwdef/rogue_hw_defs.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_builtin_builder.h"
#include "pco.h"
#include "pco_builder.h"
#include "pco_internal.h"
#include "util/macros.h"
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
/* State word unpacking helpers. */
#define STATE_UNPACK(b, state_word, word, start_bit, num_bits) \
nir_ubitfield_extract_imm(b, state_word[word], start_bit, num_bits)
#define STATE_UNPACK_ADD(b, state_word, word, start_bit, num_bits, val) \
nir_iadd_imm(b, STATE_UNPACK(b, state_word, word, start_bit, num_bits), val)
#define STATE_UNPACK_SHIFT(b, state_word, word, start_bit, num_bits, val) \
nir_ishl(b, \
nir_imm_int(b, val), \
STATE_UNPACK(b, state_word, word, start_bit, num_bits))
static inline nir_def *get_src_def(nir_tex_instr *tex,
nir_tex_src_type src_type)
{
int src_idx = nir_tex_instr_src_index(tex, src_type);
return src_idx >= 0 ? tex->src[src_idx].src.ssa : NULL;
}
/**
* \brief Lowers a basic texture query (no sampling required).
*
* \param[in] b NIR builder.
* \param[in] tex NIR texture instruction.
* \param[in] tex_desc_set Texture descriptor set.
* \param[in] tex_binding Texture binding.
* \param[in] common Shader common data.
* \return The replacement/lowered def.
*/
static nir_def *lower_tex_query_basic(nir_builder *b,
nir_tex_instr *tex,
unsigned tex_desc_set,
unsigned tex_binding,
pco_common_data *common)
{
/* Load texture state words. */
nir_def *tex_state = nir_load_tex_state_pco(b,
ROGUE_NUM_TEXSTATE_DWORDS,
.desc_set = tex_desc_set,
.binding = tex_binding);
nir_def *tex_state_word[] = {
[0] = nir_channel(b, tex_state, 0),
[1] = nir_channel(b, tex_state, 1),
[2] = nir_channel(b, tex_state, 2),
[3] = nir_channel(b, tex_state, 3),
};
switch (tex->op) {
case nir_texop_query_levels:
return STATE_UNPACK(b, tex_state_word, 2, 0, 4);
case nir_texop_texture_samples:
return STATE_UNPACK_SHIFT(b, tex_state_word, 1, 30, 2, 1);
case nir_texop_txs: {
unsigned num_comps = tex->def.num_components;
if (tex->is_array)
--num_comps;
nir_def *size_comps[] = {
[0] = STATE_UNPACK_ADD(b, tex_state_word, 1, 2, 14, 1),
[1] = STATE_UNPACK_ADD(b, tex_state_word, 1, 16, 14, 1),
[2] = STATE_UNPACK_ADD(b, tex_state_word, 2, 4, 11, 1),
};
nir_def *base_level = STATE_UNPACK(b, tex_state_word, 3, 28, 4);
nir_def *lod = get_src_def(tex, nir_tex_src_lod);
assert(lod);
lod = nir_iadd(b, lod, base_level);
for (unsigned c = 0; c < num_comps; ++c)
size_comps[c] = nir_umax_imm(b, nir_ushr(b, size_comps[c], lod), 1);
if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D && tex->is_array)
size_comps[1] = size_comps[2];
return nir_vec(b, size_comps, tex->def.num_components);
}
default:
break;
}
UNREACHABLE("");
}
static inline enum pco_dim to_pco_dim(enum glsl_sampler_dim dim)
{
switch (dim) {
case GLSL_SAMPLER_DIM_1D:
case GLSL_SAMPLER_DIM_BUF:
return PCO_DIM_1D;
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_MS:
case GLSL_SAMPLER_DIM_SUBPASS:
case GLSL_SAMPLER_DIM_SUBPASS_MS:
return PCO_DIM_2D;
case GLSL_SAMPLER_DIM_3D:
case GLSL_SAMPLER_DIM_CUBE:
return PCO_DIM_3D;
/* case GLSL_SAMPLER_DIM_RECT: */
/* case GLSL_SAMPLER_DIM_EXTERNAL: */
default:
break;
}
UNREACHABLE("");
}
static nir_def *
lower_tex_query_lod(nir_builder *b, nir_def *coords, nir_def *smp_coeffs)
{
nir_def *lod_dval_post_clamp =
nir_channel(b, smp_coeffs, ROGUE_SMP_COEFF_LOD_DVAL_POST_CLAMP);
nir_def *lod_dval_pre_clamp =
nir_channel(b, smp_coeffs, ROGUE_SMP_COEFF_LOD_DVAL_PRE_CLAMP);
nir_def *tfrac_post_clamp =
nir_channel(b, smp_coeffs, ROGUE_SMP_COEFF_TFRAC_POST_CLAMP);
nir_def *tfrac_pre_clamp =
nir_channel(b, smp_coeffs, ROGUE_SMP_COEFF_TFRAC_PRE_CLAMP);
/* Unpack. */
lod_dval_post_clamp = nir_fmul_imm(b, lod_dval_post_clamp, 255.0f);
lod_dval_pre_clamp = nir_fmul_imm(b, lod_dval_pre_clamp, 255.0f);
tfrac_post_clamp = nir_fmul_imm(b, tfrac_post_clamp, 255.0f);
tfrac_pre_clamp = nir_fmul_imm(b, tfrac_pre_clamp, 255.0f);
/* Scale. */
tfrac_post_clamp = nir_fdiv_imm(b, tfrac_post_clamp, 256.0f);
tfrac_pre_clamp = nir_fdiv_imm(b, tfrac_pre_clamp, 256.0f);
/* Calculate coord deltas. */
nir_def *coord_deltas = nir_imm_int(b, 0);
for (unsigned c = 0; c < coords->num_components; ++c) {
nir_def *coord = nir_channel(b, coords, c);
coord_deltas = nir_fadd(b,
coord_deltas,
nir_fadd(b,
nir_fabs(b, nir_ddx(b, coord)),
nir_fabs(b, nir_ddy(b, coord))));
}
nir_def *lod_comps[2] = {
[0] = nir_fadd(b, lod_dval_post_clamp, tfrac_post_clamp),
[1] = nir_fadd(
b,
nir_fadd_imm(b, tfrac_pre_clamp, -128.0f),
nir_fcsel(b, coord_deltas, lod_dval_pre_clamp, nir_imm_float(b, 0.0f))),
};
return nir_vec(b, lod_comps, ARRAY_SIZE(lod_comps));
}
static inline unsigned process_coords(nir_builder *b,
bool is_array,
bool is_query_lod,
bool coords_are_float,
nir_def *coords,
nir_def **float_coords,
nir_def **int_coords,
nir_def **float_array_index,
nir_def **int_array_index)
{
unsigned num_comps = coords->num_components;
*float_coords = coords_are_float ? coords : nir_i2f32(b, coords);
*int_coords = !coords_are_float ? coords : nir_f2i32(b, coords);
*float_array_index = NULL;
*int_array_index = NULL;
if (!is_array || is_query_lod)
return num_comps;
*float_array_index = nir_channel(b, *float_coords, num_comps - 1);
*int_array_index = nir_channel(b, *int_coords, num_comps - 1);
*float_coords = nir_trim_vector(b, *float_coords, num_comps - 1);
*int_coords = nir_trim_vector(b, *int_coords, num_comps - 1);
return num_comps - 1;
}
static inline bool tex_src_is_float(nir_tex_instr *tex,
nir_tex_src_type src_type)
{
int src_idx = nir_tex_instr_src_index(tex, src_type);
assert(src_idx >= 0);
return nir_tex_instr_src_type(tex, src_idx) == nir_type_float;
}
/* 40-bit address, shifted right by two: */
static inline void unpack_base_addr(nir_builder *b,
nir_def *tex_state_word[static 4],
nir_def **base_addr_lo,
nir_def **base_addr_hi)
{
*base_addr_lo = nir_imm_int(b, 0);
/* addr_lo[17..2] */
nir_def *lo_17_2 = STATE_UNPACK(b, tex_state_word, 2, 16, 16);
*base_addr_lo = nir_bitfield_insert_imm(b, *base_addr_lo, lo_17_2, 2, 16);
/* addr_lo[31..18] */
nir_def *lo_31_18 = STATE_UNPACK(b, tex_state_word, 3, 0, 14);
*base_addr_lo = nir_bitfield_insert_imm(b, *base_addr_lo, lo_31_18, 18, 14);
/* addr_hi[7..0] */
*base_addr_hi = STATE_UNPACK(b, tex_state_word, 3, 14, 8);
}
/**
* \brief Lowers a texture instruction.
*
* \param[in] b NIR builder.
* \param[in] instr NIR instruction.
* \param[in] cb_data User callback data.
* \return The replacement/lowered def.
*/
static nir_def *lower_tex(nir_builder *b, nir_instr *instr, void *cb_data)
{
nir_tex_instr *tex = nir_instr_as_tex(instr);
pco_common_data *common = cb_data;
unsigned tex_desc_set;
unsigned tex_binding;
pco_unpack_desc(tex->texture_index, &tex_desc_set, &tex_binding);
unsigned smp_desc_set;
unsigned smp_binding;
pco_unpack_desc(tex->sampler_index, &smp_desc_set, &smp_binding);
bool hw_array_support = false;
bool hw_int_support = false;
b->cursor = nir_before_instr(instr);
if (nir_tex_instr_is_query(tex) && tex->op != nir_texop_lod)
return lower_tex_query_basic(b, tex, tex_desc_set, tex_binding, common);
nir_def *tex_state = nir_load_tex_state_pco(b,
ROGUE_NUM_TEXSTATE_DWORDS,
.desc_set = tex_desc_set,
.binding = tex_binding);
nir_def *smp_state = nir_load_smp_state_pco(b,
ROGUE_NUM_TEXSTATE_DWORDS,
.desc_set = smp_desc_set,
.binding = smp_binding);
/* Process tex sources, build up the smp flags and data words. */
BITSET_DECLARE(tex_src_set, nir_num_tex_src_types) = { 0 };
nir_def *tex_srcs[nir_num_tex_src_types];
nir_def *smp_data_comps[NIR_MAX_VEC_COMPONENTS];
unsigned smp_data_comp_count = 0;
pco_smp_flags smp_flags = {
.dim = to_pco_dim(tex->sampler_dim),
.lod_mode = PCO_LOD_MODE_NORMAL,
};
for (unsigned s = 0; s < nir_num_tex_src_types; ++s)
if ((tex_srcs[s] = get_src_def(tex, s)) != NULL)
BITSET_SET(tex_src_set, s);
nir_def *float_coords;
nir_def *int_coords;
nir_def *float_array_index;
nir_def *int_array_index;
unsigned num_coord_comps =
process_coords(b,
tex->is_array,
tex->op == nir_texop_lod,
tex_src_is_float(tex, nir_tex_src_coord),
tex_srcs[nir_tex_src_coord],
&float_coords,
&int_coords,
&float_array_index,
&int_array_index);
bool use_int_coords = !tex_src_is_float(tex, nir_tex_src_coord) &&
hw_int_support;
assert(BITSET_TEST(tex_src_set, nir_tex_src_coord));
if (BITSET_TEST(tex_src_set, nir_tex_src_coord)) {
for (unsigned c = 0; c < num_coord_comps; ++c) {
smp_data_comps[smp_data_comp_count++] =
nir_channel(b, use_int_coords ? int_coords : float_coords, c);
}
BITSET_CLEAR(tex_src_set, nir_tex_src_coord);
}
nir_def *proj = NULL;
if (BITSET_TEST(tex_src_set, nir_tex_src_projector)) {
assert(tex_src_is_float(tex, nir_tex_src_projector));
proj = tex_srcs[nir_tex_src_projector];
smp_data_comps[smp_data_comp_count++] =
use_int_coords ? nir_f2i32(b, proj) : proj;
smp_flags.proj = true;
BITSET_CLEAR(tex_src_set, nir_tex_src_projector);
}
if (hw_array_support && int_array_index) {
smp_data_comps[smp_data_comp_count++] =
use_int_coords ? int_array_index : float_array_index;
smp_flags.array = true;
}
assert((BITSET_TEST(tex_src_set, nir_tex_src_bias) +
BITSET_TEST(tex_src_set, nir_tex_src_lod) +
BITSET_TEST(tex_src_set, nir_tex_src_ddx)) < 2);
bool lod_set = false;
if (BITSET_TEST(tex_src_set, nir_tex_src_bias)) {
nir_def *lod = tex_srcs[nir_tex_src_bias];
if (!tex_src_is_float(tex, nir_tex_src_bias))
lod = nir_i2f32(b, lod);
smp_data_comps[smp_data_comp_count++] = lod;
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_BIAS;
lod_set = true;
BITSET_CLEAR(tex_src_set, nir_tex_src_bias);
} else if (BITSET_TEST(tex_src_set, nir_tex_src_lod)) {
nir_def *lod = tex_srcs[nir_tex_src_lod];
if (!tex_src_is_float(tex, nir_tex_src_lod))
lod = nir_i2f32(b, lod);
smp_data_comps[smp_data_comp_count++] = lod;
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_REPLACE;
lod_set = true;
BITSET_CLEAR(tex_src_set, nir_tex_src_lod);
} else if (BITSET_TEST(tex_src_set, nir_tex_src_ddx)) {
assert(BITSET_TEST(tex_src_set, nir_tex_src_ddy));
assert(tex_src_is_float(tex, nir_tex_src_ddx) &&
tex_src_is_float(tex, nir_tex_src_ddy));
nir_def *ddx = tex_srcs[nir_tex_src_ddx];
nir_def *ddy = tex_srcs[nir_tex_src_ddy];
for (unsigned c = 0; c < ddx->num_components; ++c) {
smp_data_comps[smp_data_comp_count++] = nir_channel(b, ddx, c);
smp_data_comps[smp_data_comp_count++] = nir_channel(b, ddy, c);
}
smp_flags.lod_mode = PCO_LOD_MODE_GRADIENTS;
lod_set = true;
BITSET_CLEAR(tex_src_set, nir_tex_src_ddx);
BITSET_CLEAR(tex_src_set, nir_tex_src_ddy);
}
if (!hw_array_support && int_array_index) {
/* Set a per-pixel lod bias of 0 if none has been set yet. */
if (!lod_set) {
smp_data_comps[smp_data_comp_count++] = nir_imm_int(b, 0);
smp_flags.pplod = true;
smp_flags.lod_mode = PCO_LOD_MODE_BIAS;
lod_set = true;
}
nir_def *tex_state_word[] = {
[0] = nir_channel(b, tex_state, 0),
[1] = nir_channel(b, tex_state, 1),
[2] = nir_channel(b, tex_state, 2),
[3] = nir_channel(b, tex_state, 3),
};
nir_def *base_addr_lo;
nir_def *base_addr_hi;
unpack_base_addr(b, tex_state_word, &base_addr_lo, &base_addr_hi);
nir_def *array_index = int_array_index;
assert(array_index);
nir_def *array_max = STATE_UNPACK(b, tex_state_word, 2, 4, 11);
array_index = nir_uclamp(b, array_index, nir_imm_int(b, 0), array_max);
nir_def *tex_meta = nir_load_tex_meta_pco(b,
PCO_IMAGE_META_COUNT,
.desc_set = tex_desc_set,
.binding = tex_binding);
nir_def *array_stride =
nir_channel(b, tex_meta, PCO_IMAGE_META_LAYER_SIZE);
nir_def *array_offset = nir_imul(b, array_index, array_stride);
nir_def *addr =
nir_uadd64_32(b, base_addr_lo, base_addr_hi, array_offset);
smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 0);
smp_data_comps[smp_data_comp_count++] = nir_channel(b, addr, 1);
smp_flags.tao = true;
}
if (BITSET_TEST(tex_src_set, nir_tex_src_offset) ||
BITSET_TEST(tex_src_set, nir_tex_src_ms_index)) {
nir_def *lookup = nir_imm_int(b, 0);
if (BITSET_TEST(tex_src_set, nir_tex_src_offset)) {
nir_def *offset = tex_srcs[nir_tex_src_offset];
const unsigned packed_offset_start[] = { 0, 6, 12 };
const unsigned packed_offset_size[] = { 6, 6, 4 };
for (unsigned c = 0; c < offset->num_components; ++c) {
lookup = nir_bitfield_insert(b,
lookup,
nir_channel(b, offset, c),
nir_imm_int(b, packed_offset_start[c]),
nir_imm_int(b, packed_offset_size[c]));
}
smp_flags.soo = true;
BITSET_CLEAR(tex_src_set, nir_tex_src_offset);
}
if (BITSET_TEST(tex_src_set, nir_tex_src_ms_index)) {
lookup = nir_bitfield_insert(b,
lookup,
tex_srcs[nir_tex_src_ms_index],
nir_imm_int(b, 16),
nir_imm_int(b, 3));
smp_flags.sno = true;
BITSET_CLEAR(tex_src_set, nir_tex_src_ms_index);
}
smp_data_comps[smp_data_comp_count++] = lookup;
}
/* Shadow comparator. */
nir_def *comparator = NULL;
if (BITSET_TEST(tex_src_set, nir_tex_src_comparator)) {
comparator = tex_srcs[nir_tex_src_comparator];
if (proj)
comparator = nir_fdiv(b, comparator, proj);
BITSET_CLEAR(tex_src_set, nir_tex_src_comparator);
}
assert(BITSET_IS_EMPTY(tex_src_set));
/* Pad out the rest of the data words. */
assert(smp_data_comp_count <= NIR_MAX_VEC_COMPONENTS);
for (unsigned c = smp_data_comp_count; c < ARRAY_SIZE(smp_data_comps); ++c)
smp_data_comps[c] = nir_imm_int(b, 0);
nir_def *smp_data = nir_vec(b, smp_data_comps, ARRAY_SIZE(smp_data_comps));
nir_def *result;
switch (tex->op) {
case nir_texop_lod:
result = nir_smp_coeffs_pco(b,
smp_data,
tex_state,
smp_state,
.smp_flags_pco = smp_flags._,
.range = smp_data_comp_count);
result = lower_tex_query_lod(b, float_coords, result);
break;
case nir_texop_txf:
case nir_texop_txf_ms:
smp_flags.nncoords = true;
FALLTHROUGH;
case nir_texop_tex:
case nir_texop_txb:
case nir_texop_txd:
case nir_texop_txl:
smp_flags.integer = use_int_coords;
smp_flags.fcnorm = nir_alu_type_get_base_type(tex->dest_type) ==
nir_type_float;
result = nir_smp_pco(b,
tex->def.num_components,
smp_data,
tex_state,
smp_state,
.smp_flags_pco = smp_flags._,
.range = smp_data_comp_count);
break;
default:
UNREACHABLE("");
}
if (tex->is_shadow) {
assert(result->num_components == 1);
nir_def *compare_op =
nir_load_smp_meta_pco(b,
1,
.desc_set = smp_desc_set,
.binding = smp_binding,
.component = PCO_SAMPLER_META_COMPARE_OP);
result = nir_alphatst_pco(b, result, comparator, compare_op);
}
return result;
}
/**
* \brief Filters texture instructions.
*
* \param[in] instr NIR instruction.
* \param[in] cb_data User callback data.
* \return True if the instruction matches the filter.
*/
static bool is_tex(const nir_instr *instr, UNUSED const void *cb_data)
{
return instr->type == nir_instr_type_tex;
}
/**
* \brief Texture lowering pass.
*
* \param[in,out] shader NIR shader.
* \param[in] common Shader common data.
* \return True if the pass made progress.
*/
bool pco_nir_lower_tex(nir_shader *shader, pco_common_data *common)
{
return nir_shader_lower_instructions(shader, is_tex, lower_tex, common);
}

View file

@ -62,6 +62,45 @@ static nir_def *lower_load_vulkan_descriptor(nir_builder *b,
return nir_imm_ivec3(b, desc_set_binding, elem, 0);
}
static void lower_tex_deref_to_binding(nir_tex_instr *tex,
unsigned deref_index,
pco_common_data *common)
{
nir_tex_src *deref_src = &tex->src[deref_index];
nir_deref_instr *deref =
nir_instr_as_deref(deref_src->src.ssa->parent_instr);
assert(deref->deref_type == nir_deref_type_var);
/* TODO: array support */
unsigned desc_set = deref->var->data.descriptor_set;
unsigned binding = deref->var->data.binding;
set_resource_used(common, desc_set, binding);
uint32_t desc_set_binding = pco_pack_desc(desc_set, binding);
if (deref_src->src_type == nir_tex_src_texture_deref)
tex->texture_index = desc_set_binding;
else
tex->sampler_index = desc_set_binding;
nir_tex_instr_remove_src(tex, deref_index);
}
static inline void lower_tex_derefs(nir_tex_instr *tex, pco_common_data *common)
{
int deref_index;
deref_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
if (deref_index >= 0)
lower_tex_deref_to_binding(tex, deref_index, common);
deref_index = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
if (deref_index >= 0)
lower_tex_deref_to_binding(tex, deref_index, common);
}
/**
* \brief Lowers a Vulkan-related instruction.
*
@ -73,11 +112,26 @@ static nir_def *lower_load_vulkan_descriptor(nir_builder *b,
static nir_def *lower_vk(nir_builder *b, nir_instr *instr, void *cb_data)
{
pco_common_data *common = cb_data;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_vulkan_descriptor:
return lower_load_vulkan_descriptor(b, intr, common);
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_vulkan_descriptor:
return lower_load_vulkan_descriptor(b, intr, common);
default:
break;
}
break;
}
case nir_instr_type_tex: {
nir_tex_instr *tex = nir_instr_as_tex(instr);
lower_tex_derefs(tex, common);
return NIR_LOWER_INSTR_PROGRESS;
}
default:
break;
@ -95,13 +149,29 @@ static nir_def *lower_vk(nir_builder *b, nir_instr *instr, void *cb_data)
*/
static bool is_vk(const nir_instr *instr, UNUSED const void *cb_data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_vulkan_descriptor:
return true;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_vulkan_descriptor:
return true;
default:
break;
}
break;
}
case nir_instr_type_tex: {
nir_tex_instr *tex = nir_instr_as_tex(instr);
if (nir_tex_instr_src_index(tex, nir_tex_src_texture_deref) >= 0 ||
nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) >= 0) {
return true;
}
FALLTHROUGH;
}
default:
break;

View file

@ -171,7 +171,11 @@ OM_SIGNPOS = op_mod_enum('signpos', [
'mtb',
'ftb',
])
OM_DIM = op_mod('dim', BaseType.uint)
OM_DIM = op_mod_enum('dim', [
'1d',
'2d',
'3d',
])
OM_PROJ = op_mod('proj', BaseType.bool)
OM_FCNORM = op_mod('fcnorm', BaseType.bool)
OM_NNCOORDS = op_mod('nncoords', BaseType.bool)
@ -179,22 +183,24 @@ OM_LOD_MODE = op_mod_enum('lod_mode', [
('normal', ''),
('bias', 'bias'),
('replace', 'replace'),
('gradient', 'gradient'),
('gradients', 'gradients'),
])
OM_PPLOD = op_mod('pplod', BaseType.bool)
OM_TAO = op_mod('tao', BaseType.bool)
OM_SOO = op_mod('soo', BaseType.bool)
OM_SNO = op_mod('sno', BaseType.bool)
OM_WRT = op_mod('wrt', BaseType.bool)
OM_SB_MODE = op_mod_enum('sb_mode', [
('none', ''),
('data', 'data'),
('info', 'info'),
('rawdata', 'rawdata'),
('coeffs', 'coeffs'),
('both', 'both'),
])
OM_ARRAY = op_mod('array', BaseType.bool)
OM_INTEGER = op_mod('integer', BaseType.bool)
OM_SCHEDSWAP = op_mod('schedswap', BaseType.bool)
OM_SCHEDSWAP = op_mod_enum('schedswap', [
('none', ''),
('swap', 'schedswap'),
])
OM_F16 = op_mod('f16', BaseType.bool)
OM_TILED = op_mod('tiled', BaseType.bool)
OM_FREEP = op_mod('freep', BaseType.bool)
@ -268,9 +274,9 @@ OM_ITR_MODE = op_mod_enum('itr_mode', [
'centroid',
])
OM_SCHED = op_mod_enum('sched', [
'none',
'swap',
'wdf',
('none', ''),
('swap', 'schedswap'),
('wdf', 'schedwdf'),
])
OM_ATOM = op_mod('atom', BaseType.bool, unset=True)
OM_OLCHK = op_mod('olchk', BaseType.bool, unset=True)
@ -363,6 +369,13 @@ O_LD = hw_op('ld', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_LD], 1, 3)
O_ST = hw_direct_op('st', [OM_MCU_CACHE_MODE_ST], 0, 6)
O_ATOMIC = hw_op('atomic', [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM_OP], 1, 2)
O_SMP = hw_op('smp', OM_ALU_RPT1 + [OM_DIM, OM_PROJ, OM_FCNORM, OM_NNCOORDS,
OM_LOD_MODE, OM_PPLOD, OM_TAO, OM_SOO,
OM_SNO, OM_SB_MODE, OM_MCU_CACHE_MODE_LD,
OM_ARRAY, OM_INTEGER, OM_SCHEDSWAP, OM_F16], 1, 6)
O_ALPHATST = hw_op('alphatst', OM_ALU_RPT1, 1, 4)
## Bitwise.
O_MOVI32 = hw_op('movi32', OM_ALU, 1, 1)
@ -406,6 +419,7 @@ O_SCMP = hw_op('scmp', OM_ALU + [OM_TST_OP_MAIN], 1, 2, [], [[RM_ABS, RM_NEG], [
O_BCMP = hw_op('bcmp', OM_ALU + [OM_TST_OP_MAIN, OM_TST_TYPE_MAIN], 1, 2, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_BCSEL = hw_op('bcsel', OM_ALU, 1, 3, [], [[], [RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_CSEL = hw_op('csel', OM_ALU + [OM_TST_OP_MAIN, OM_TST_TYPE_MAIN], 1, 3, [], [[], [RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_PSEL = hw_op('psel', OM_ALU, 1, 3)
O_PSEL_TRIG = hw_op('psel_trig', OM_ALU, 1, 3)
O_FSIGN = hw_op('fsign', OM_ALU, 1, 1)
O_ISIGN = hw_op('isign', OM_ALU, 1, 1)

View file

@ -423,13 +423,11 @@ trans_store_output_fs(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref src)
}
static unsigned fetch_resource_base_reg(const pco_common_data *common,
uint32_t packed_desc,
unsigned elem)
unsigned desc_set,
unsigned binding,
unsigned elem,
bool *is_img_smp)
{
unsigned desc_set;
unsigned binding;
pco_unpack_desc(packed_desc, &desc_set, &binding);
assert(desc_set < ARRAY_SIZE(common->desc_sets));
const pco_descriptor_set_data *desc_set_data = &common->desc_sets[desc_set];
assert(desc_set_data->used);
@ -438,14 +436,27 @@ static unsigned fetch_resource_base_reg(const pco_common_data *common,
const pco_binding_data *binding_data = &desc_set_data->bindings[binding];
assert(binding_data->used);
if (is_img_smp)
*is_img_smp = binding_data->is_img_smp;
unsigned reg_offset = elem * binding_data->range.stride;
assert(reg_offset < binding_data->range.count);
unsigned reg_index = binding_data->range.start + reg_offset;
return reg_index;
}
static unsigned fetch_resource_base_reg_packed(const pco_common_data *common,
uint32_t packed_desc,
unsigned elem,
bool *is_img_smp)
{
unsigned desc_set;
unsigned binding;
pco_unpack_desc(packed_desc, &desc_set, &binding);
return fetch_resource_base_reg(common, desc_set, binding, elem, is_img_smp);
}
static pco_instr *trans_load_buffer(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
@ -459,7 +470,8 @@ static pco_instr *trans_load_buffer(trans_ctx *tctx,
uint32_t packed_desc = nir_src_comp_as_uint(intr->src[0], 0);
unsigned elem = nir_src_comp_as_uint(intr->src[0], 1);
unsigned sh_index = fetch_resource_base_reg(common, packed_desc, elem);
unsigned sh_index =
fetch_resource_base_reg_packed(common, packed_desc, elem, NULL);
pco_ref base_addr[2];
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
@ -498,7 +510,8 @@ static pco_instr *trans_store_buffer(trans_ctx *tctx,
uint32_t packed_desc = nir_src_comp_as_uint(intr->src[1], 0);
unsigned elem = nir_src_comp_as_uint(intr->src[1], 1);
unsigned sh_index = fetch_resource_base_reg(common, packed_desc, elem);
unsigned sh_index =
fetch_resource_base_reg_packed(common, packed_desc, elem, NULL);
pco_ref base_addr[2];
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
@ -593,7 +606,8 @@ static pco_instr *trans_atomic_buffer(trans_ctx *tctx,
uint32_t packed_desc = nir_src_comp_as_uint(intr->src[0], 0);
unsigned elem = nir_src_comp_as_uint(intr->src[0], 1);
unsigned sh_index = fetch_resource_base_reg(common, packed_desc, elem);
unsigned sh_index =
fetch_resource_base_reg_packed(common, packed_desc, elem, NULL);
pco_ref base_addr[2];
pco_ref_hwreg_addr_comps(sh_index, PCO_REG_CLASS_SHARED, base_addr);
@ -681,6 +695,162 @@ trans_load_sysval(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest)
return pco_mov(&tctx->b, dest, src, .rpt = chans);
}
static bool desc_set_binding_is_comb_img_smp(unsigned desc_set,
unsigned binding,
const pco_common_data *common)
{
const pco_descriptor_set_data *desc_set_data = &common->desc_sets[desc_set];
assert(desc_set_data->used);
assert(desc_set_data->bindings && binding < desc_set_data->binding_count);
const pco_binding_data *binding_data = &desc_set_data->bindings[binding];
assert(binding_data->used);
return binding_data->is_img_smp;
}
static pco_instr *lower_load_tex_smp_state(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
bool smp)
{
unsigned desc_set = nir_intrinsic_desc_set(intr);
unsigned binding = nir_intrinsic_binding(intr);
unsigned start_comp = nir_intrinsic_component(intr);
unsigned chans = pco_ref_get_chans(dest);
assert(start_comp + chans <= ROGUE_NUM_TEXSTATE_DWORDS);
/* TODO: array support. */
const pco_common_data *common = &tctx->shader->data.common;
bool is_img_smp;
unsigned sh_index =
fetch_resource_base_reg(common, desc_set, binding, 0, &is_img_smp);
pco_ref state_words =
pco_ref_hwreg_vec(sh_index, PCO_REG_CLASS_SHARED, chans);
/* Sampler state comes after image state and metadata in combined image
* samplers.
*/
if (smp && is_img_smp) {
state_words = pco_ref_offset(state_words, ROGUE_NUM_TEXSTATE_DWORDS);
state_words = pco_ref_offset(state_words, PCO_IMAGE_META_COUNT);
}
state_words = pco_ref_offset(state_words, start_comp);
return pco_mov(&tctx->b, dest, state_words, .rpt = chans);
}
static pco_instr *lower_load_tex_smp_meta(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
bool smp)
{
unsigned desc_set = nir_intrinsic_desc_set(intr);
unsigned binding = nir_intrinsic_binding(intr);
unsigned start_comp = nir_intrinsic_component(intr);
unsigned chans = pco_ref_get_chans(dest);
/* TODO: array support. */
const pco_common_data *common = &tctx->shader->data.common;
bool is_img_smp;
unsigned sh_index =
fetch_resource_base_reg(common, desc_set, binding, 0, &is_img_smp);
pco_ref state_words =
pco_ref_hwreg_vec(sh_index, PCO_REG_CLASS_SHARED, chans);
assert(start_comp + chans <=
(smp ? PCO_SAMPLER_META_COUNT : PCO_IMAGE_META_COUNT));
state_words = pco_ref_offset(state_words, ROGUE_NUM_TEXSTATE_DWORDS);
if (smp && is_img_smp) {
state_words = pco_ref_offset(state_words, PCO_IMAGE_META_COUNT);
state_words = pco_ref_offset(state_words, ROGUE_NUM_TEXSTATE_DWORDS);
}
state_words = pco_ref_offset(state_words, start_comp);
return pco_mov(&tctx->b, dest, state_words, .rpt = chans);
}
static pco_instr *lower_smp(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref *dest,
pco_ref data,
pco_ref tex_state,
pco_ref smp_state)
{
pco_smp_flags smp_flags = { ._ = nir_intrinsic_smp_flags_pco(intr) };
unsigned data_comps = nir_intrinsic_range(intr);
data = pco_ref_chans(data, data_comps);
unsigned chans = pco_ref_get_chans(*dest);
enum pco_sb_mode sb_mode = PCO_SB_MODE_NONE;
switch (intr->intrinsic) {
case nir_intrinsic_smp_coeffs_pco:
/* Shrink the destination to its actual size. */
*dest = pco_ref_chans(*dest, ROGUE_SMP_COEFF_COUNT);
chans = 1; /* Chans must be 1 for coeff mode. */
sb_mode = PCO_SB_MODE_COEFFS;
break;
case nir_intrinsic_smp_pco:
/* Destination and chans should be correct. */
break;
default:
UNREACHABLE("");
}
pco_ref shared_lod = pco_ref_null();
pco_instr *smp = pco_smp(&tctx->b,
*dest,
pco_ref_drc(PCO_DRC_0),
tex_state,
data,
smp_state,
shared_lod,
pco_ref_imm8(chans),
.dim = smp_flags.dim,
.proj = smp_flags.proj,
.fcnorm = smp_flags.fcnorm,
.nncoords = smp_flags.nncoords,
.lod_mode = smp_flags.lod_mode,
.pplod = smp_flags.pplod,
.tao = smp_flags.tao,
.soo = smp_flags.soo,
.sno = smp_flags.sno,
.sb_mode = sb_mode,
.array = smp_flags.array,
.integer = smp_flags.integer);
return smp;
}
static pco_instr *lower_alphatst(trans_ctx *tctx,
pco_ref dest,
pco_ref src0,
pco_ref src1,
pco_ref src2)
{
pco_alphatst(&tctx->b,
pco_ref_pred(PCO_PRED_P0),
pco_ref_drc(PCO_DRC_0),
src0,
src1,
src2);
return pco_psel(&tctx->b,
dest,
pco_ref_pred(PCO_PRED_P0),
pco_fone,
pco_zero);
}
/**
* \brief Translates a NIR intrinsic instruction into PCO.
*
@ -762,6 +932,32 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
: pco_fdsy(&tctx->b, dest, src[0]);
break;
/* Texture-related intrinsics. */
case nir_intrinsic_load_tex_state_pco:
instr = lower_load_tex_smp_state(tctx, intr, dest, false);
break;
case nir_intrinsic_load_smp_state_pco:
instr = lower_load_tex_smp_state(tctx, intr, dest, true);
break;
case nir_intrinsic_load_tex_meta_pco:
instr = lower_load_tex_smp_meta(tctx, intr, dest, false);
break;
case nir_intrinsic_load_smp_meta_pco:
instr = lower_load_tex_smp_meta(tctx, intr, dest, true);
break;
case nir_intrinsic_smp_coeffs_pco:
case nir_intrinsic_smp_pco:
instr = lower_smp(tctx, intr, &dest, src[0], src[1], src[2]);
break;
case nir_intrinsic_alphatst_pco:
instr = lower_alphatst(tctx, dest, src[0], src[1], src[2]);
break;
default:
printf("Unsupported intrinsic: \"");
nir_print_instr(&intr->instr, stdout);