pvr, pco: temporarily add legacy tq shader gen code

The smp emission code will be moved back to pco_nir_tex following the
addition of the updated tq shader gen code.

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta 2025-02-20 15:47:00 +00:00 committed by Marge Bot
parent 61f8b57b1a
commit 3aacb6731c
8 changed files with 886 additions and 47 deletions

View file

@ -53,4 +53,41 @@ void pco_validate_shader(pco_shader *shader, const char *when);
void pco_print_shader(pco_shader *shader, FILE *fp, const char *when);
void pco_print_binary(pco_shader *shader, FILE *fp, const char *when);
#include "compiler/nir/nir_builder.h"
typedef struct _pco_smp_params {
nir_def *tex_state;
nir_def *smp_state;
nir_alu_type dest_type;
enum glsl_sampler_dim sampler_dim;
bool nncoords;
nir_def *coords;
nir_def *array_index;
nir_def *proj;
nir_def *lod_bias;
nir_def *lod_replace;
nir_def *lod_ddx;
nir_def *lod_ddy;
nir_def *addr_lo;
nir_def *addr_hi;
nir_def *offset;
nir_def *ms_index;
nir_def *write_data;
bool sample_coeffs;
bool sample_raw;
unsigned sample_components;
bool int_mode;
} pco_smp_params;
nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b, pco_smp_params *params);
#endif /* PCO_H */

View file

@ -137,6 +137,9 @@ void pco_encode_ir(pco_ctx *ctx, pco_shader *shader)
*/
unsigned pco_shader_binary_size(pco_shader *shader)
{
if (!shader)
return 0;
return shader->binary.size;
}
@ -148,5 +151,7 @@ unsigned pco_shader_binary_size(pco_shader *shader)
*/
const void *pco_shader_binary_data(pco_shader *shader)
{
if (!shader)
return NULL;
return shader->binary.data;
}

View file

@ -226,42 +226,7 @@ static inline void unpack_base_addr(nir_builder *b,
*base_addr_hi = STATE_UNPACK(b, tex_state_word, 3, 14, 8);
}
typedef struct _pco_smp_params {
nir_def *tex_state;
nir_def *smp_state;
nir_alu_type dest_type;
enum glsl_sampler_dim sampler_dim;
bool nncoords;
nir_def *coords;
nir_def *array_index;
nir_def *proj;
nir_def *lod_bias;
nir_def *lod_replace;
nir_def *lod_ddx;
nir_def *lod_ddy;
nir_def *addr_lo;
nir_def *addr_hi;
nir_def *offset;
nir_def *ms_index;
nir_def *write_data;
bool sample_coeffs;
bool sample_raw;
unsigned sample_components;
bool int_mode;
} pco_smp_params;
static nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b,
pco_smp_params *params)
nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b, pco_smp_params *params)
{
nir_def *comps[NIR_MAX_VEC_COMPONENTS];
unsigned count = 0;
@ -427,9 +392,11 @@ static nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b,
assert(!params->sample_coeffs);
assert(!params->sample_raw);
assert(params->sample_components > 0);
assert(!params->write_data);
if (!params->sample_components)
params->sample_components = 4;
nir_def *def = nir_smp_pco(b,
params->sample_components,
smp_data,

View file

@ -472,8 +472,14 @@ static bool pco_ra_func(pco_func *func,
pco_ref_xfer_mods(&src, psrc, false);
if (!pco_refs_are_equal(src, dest, true))
pco_mbyp(&b, dest, src, .exec_cnd = exec_cnd);
if (!pco_refs_are_equal(src, dest, true)) {
if (pco_ref_is_reg(src) &&
pco_ref_get_reg_class(src) == PCO_REG_CLASS_SPEC) {
pco_movs1(&b, dest, src, .exec_cnd = exec_cnd);
} else {
pco_mbyp(&b, dest, src, .exec_cnd = exec_cnd);
}
}
}
temps = MAX2(temps, temp_dest_base + offset + chans);

View file

@ -1166,6 +1166,15 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
instr = trans_load_output_fs(tctx, intr, dest);
break;
case nir_intrinsic_load_preamble:
instr = pco_mov(&tctx->b,
dest,
pco_ref_hwreg_vec(nir_intrinsic_base(intr),
PCO_REG_CLASS_SHARED,
pco_ref_get_chans(dest)),
.rpt = pco_ref_get_chans(dest));
break;
case nir_intrinsic_load_push_constant:
instr =
trans_load_common_store(tctx,

View file

@ -35,6 +35,7 @@
#include "pvr_private.h"
#include "pvr_transfer_frag_store.h"
#include "pvr_types.h"
#include "pvr_usc.h"
#include "usc/pvr_uscgen.h"
#include "util/hash_table.h"
#include "util/macros.h"
@ -187,7 +188,6 @@ static VkResult pvr_transfer_frag_store_entry_data_compile(
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout = &entry_data->sh_reg_layout;
uint32_t next_free_sh_reg = 0;
struct util_dynarray shader;
VkResult result;
/* TODO: Allocate all combined image samplers if needed? Otherwise change the
@ -208,17 +208,19 @@ static VkResult pvr_transfer_frag_store_entry_data_compile(
sh_reg_layout->driver_total = next_free_sh_reg;
pvr_uscgen_tq_frag(shader_props,
&entry_data->sh_reg_layout,
num_usc_temps_out,
&shader);
pco_shader *tq =
pvr_uscgen_tq(device->pdevice->pco_ctx, shader_props, sh_reg_layout);
*num_usc_temps_out = pco_shader_data(tq)->common.temps;
result = pvr_gpu_upload_usc(device,
util_dynarray_begin(&shader),
util_dynarray_num_elements(&shader, uint8_t),
pco_shader_binary_data(tq),
pco_shader_binary_size(tq),
cache_line_size,
&entry_data->usc_upload);
util_dynarray_fini(&shader);
ralloc_free(tq);
if (result != VK_SUCCESS)
return result;

View file

@ -12,12 +12,19 @@
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "nir/nir_format_convert.h"
#include "nir/nir_conversion_builder.h"
#include "pco/pco.h"
#include "pco/pco_data.h"
#include "pco_uscgen_programs.h"
#include "pvr_common.h"
#include "pvr_formats.h"
#include "pvr_usc.h"
#include "usc/pvr_uscgen.h"
#include "util/macros.h"
#define PVR_MAX_SAMPLE_COUNT 8
/**
* Common function to build a NIR shader and export the binary.
*
@ -84,3 +91,804 @@ pco_shader *pvr_usc_tq(pco_ctx *ctx, struct pvr_tq_props *props)
{
UNREACHABLE("finishme: pvr_usc_tq");
}
static bool needs_packing(enum pvr_transfer_pbe_pixel_src format)
{
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24:
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128:
case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4:
return false;
default:
break;
}
return true;
}
static bool needs_conversion(enum pvr_transfer_pbe_pixel_src format)
{
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED:
case PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED:
case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V:
case PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
return true;
default:
break;
}
return false;
}
static void
int_format_signs(enum pvr_transfer_pbe_pixel_src format, bool *src, bool *dst)
{
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
*src = false;
break;
case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
*src = true;
break;
default:
UNREACHABLE("Invalid format");
}
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
*dst = false;
break;
case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
*dst = true;
break;
default:
UNREACHABLE("Invalid format");
}
}
static nir_def *
picked_component(nir_builder *b,
nir_def *src,
unsigned *next_sh,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout)
{
unsigned base_sh = sh_reg_layout->dynamic_consts.offset;
nir_variable *pos = nir_get_variable_with_location(b->shader,
nir_var_shader_in,
VARYING_SLOT_POS,
glsl_vec4_type());
nir_def *coord_x = nir_f2i32(b, nir_channel(b, nir_load_var(b, pos), 0));
nir_def *mask = nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh);
nir_def *offset =
nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh + 1);
nir_def *comp_idx = nir_iand(b, nir_isub(b, coord_x, offset), mask);
nir_def *shift_val = nir_imul_imm(b, comp_idx, 8);
*next_sh += 2;
return nir_ushr(b, src, shift_val);
}
static nir_def *pack_int_value(nir_builder *b,
unsigned *next_sh,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
bool pick_component,
nir_def *src,
enum pvr_transfer_pbe_pixel_src format)
{
unsigned src_num_components = 4;
const unsigned bits_8[] = { 8, 8, 8, 8 };
const unsigned bits_10[] = { 10, 10, 10, 2 };
const unsigned bits_16[] = { 16, 16, 16, 16 };
const unsigned bits_32[] = { 32, 32, 32, 32 };
const unsigned *bits;
bool src_signed, dst_signed;
int_format_signs(format, &src_signed, &dst_signed);
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
bits = bits_8;
break;
case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
bits = bits_16;
break;
case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
bits = bits_32;
break;
case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
bits = bits_10;
break;
default:
UNREACHABLE("Invalid format");
}
if (format == PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32 ||
format == PVR_TRANSFER_PBE_PIXEL_SRC_US32S32) {
src_num_components = 2;
}
if (format == PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102 ||
format == PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102) {
unsigned swiz[] = { 2, 1, 0, 3 };
src = nir_swizzle(b, src, swiz, 4);
}
if (src_signed != dst_signed) {
src = nir_convert_with_rounding(b,
src,
src_signed ? nir_type_int : nir_type_uint,
dst_signed ? nir_type_int32
: nir_type_uint32,
nir_rounding_mode_undef,
true);
}
if (dst_signed)
src = nir_format_clamp_sint(b, src, bits);
else
src = nir_format_clamp_uint(b, src, bits);
if ((bits[0] < 32) && dst_signed)
src = nir_format_mask_uvec(b, src, bits);
if (bits != bits_16) {
src = nir_format_pack_uint(b, src, bits, src_num_components);
} else {
src =
nir_vec2(b,
nir_format_pack_uint(b, nir_channels(b, src, 0x3), bits, 2),
nir_format_pack_uint(b, nir_channels(b, src, 0xc), bits, 2));
}
if (!pick_component)
return src;
return picked_component(b, src, next_sh, sh_reg_layout);
}
static nir_def *merge_depth_stencil(nir_builder *b,
nir_def *src,
enum pipe_format format,
bool merge_depth,
unsigned load_idx)
{
nir_def *dst;
unsigned mask;
assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
format == PIPE_FORMAT_Z24_UNORM_S8_UINT);
dst = nir_load_output(b,
format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ? 2 : 1,
32,
nir_imm_int(b, 0),
.base = 0,
.dest_type = nir_type_invalid | 32,
.io_semantics.location = FRAG_RESULT_DATA0 + load_idx,
.io_semantics.num_slots = 1,
.io_semantics.fb_fetch_output = true);
b->shader->info.outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0 + load_idx);
b->shader->info.fs.uses_fbfetch_output = true;
if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
if (merge_depth)
return nir_vec2(b, nir_channel(b, src, 0), nir_channel(b, dst, 1));
else
return nir_vec2(b, nir_channel(b, dst, 0), nir_channel(b, src, 1));
}
if (merge_depth)
mask = BITFIELD_MASK(24);
else
mask = BITFIELD_RANGE(24, 8);
return nir_ior(b, nir_iand_imm(b, src, mask), nir_iand_imm(b, dst, ~mask));
}
static nir_def *
pvr_uscgen_tq_frag_pack(nir_builder *b,
unsigned *next_sh,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout,
bool pick_component,
nir_def *src,
enum pvr_transfer_pbe_pixel_src format,
unsigned load_idx)
{
if (!needs_packing(format))
return src;
/* Integer packing */
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_US8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32:
case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32:
case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32:
case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32:
case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102:
case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102:
return pack_int_value(b,
next_sh,
sh_reg_layout,
pick_component,
src,
format);
case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
return nir_vec2(b,
nir_pack_half_2x16(b, nir_channels(b, src, 0x3)),
nir_pack_half_2x16(b, nir_channels(b, src, 0xc)));
case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM:
return nir_vec2(b,
nir_pack_unorm_2x16(b, nir_channels(b, src, 0x3)),
nir_pack_unorm_2x16(b, nir_channels(b, src, 0xc)));
case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM:
return nir_vec2(b,
nir_pack_snorm_2x16(b, nir_channels(b, src, 0x3)),
nir_pack_snorm_2x16(b, nir_channels(b, src, 0xc)));
case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
return nir_pack_unorm_4x8(b, src);
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
return merge_depth_stencil(b,
src,
PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
false,
load_idx);
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
return merge_depth_stencil(b,
src,
PIPE_FORMAT_Z32_FLOAT_S8X24_UINT,
true,
load_idx);
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
return merge_depth_stencil(b,
src,
PIPE_FORMAT_Z24_UNORM_S8_UINT,
false,
load_idx);
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8:
return merge_depth_stencil(b,
src,
PIPE_FORMAT_Z24_UNORM_S8_UINT,
true,
load_idx);
default:
UNREACHABLE("Unimplemented pvr_transfer_pbe_pixel_src");
}
}
static bool uses_int_resolve(enum pvr_transfer_pbe_pixel_src format)
{
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16:
case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
return false;
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32:
case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8:
return true;
default:
UNREACHABLE("Unsupported pvr_transfer_pbe_pixel_src");
}
return false;
}
static void prepare_samples_for_resolve(nir_builder *b,
nir_def **samples,
unsigned num_samples,
enum pvr_transfer_pbe_pixel_src format,
enum pvr_resolve_op resolve_op)
{
unsigned num_components;
if (resolve_op == PVR_RESOLVE_MIN || resolve_op == PVR_RESOLVE_MAX) {
if (format != PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8)
return;
/* Mask out the stencil component since it is in the significant bits */
for (unsigned i = 0; i < num_samples; i++)
samples[i] = nir_iand_imm(b, samples[i], BITFIELD_MASK(24));
return;
}
assert(resolve_op == PVR_RESOLVE_BLEND);
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
/* Mask out depth and convert to f32 */
for (unsigned i = 0; i < num_samples; i++) {
samples[i] = nir_ushr_imm(b, samples[i], 24);
samples[i] = nir_u2f32(b, nir_channel(b, samples[i], 0));
}
return;
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
/* Mask out stencil and convert to f32 */
for (unsigned i = 0; i < num_samples; i++) {
samples[i] = nir_iand_imm(b, samples[i], ~BITFIELD_RANGE(24, 8));
samples[i] = nir_u2f32(b, nir_channel(b, samples[i], 0));
}
return;
case PVR_TRANSFER_PBE_PIXEL_SRC_F32:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8:
num_components = 1;
break;
case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2:
num_components = 2;
break;
default:
assert(pvr_pbe_pixel_is_norm(format));
num_components = 4;
break;
}
for (unsigned i = 0; i < num_samples; i++)
samples[i] = nir_trim_vector(b, samples[i], num_components);
}
static nir_def *post_process_resolve(nir_builder *b,
nir_def *src,
enum pvr_transfer_pbe_pixel_src format,
enum pvr_resolve_op resolve_op)
{
unsigned bits;
if (resolve_op != PVR_RESOLVE_BLEND)
return src;
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
/* Convert back to unorm and shift back to correct place */
bits = 8;
assert(src->num_components == 1);
src = nir_format_float_to_unorm(b, src, &bits);
return nir_ishl_imm(b, src, 24);
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8:
/* Convert back to unorm */
bits = 24;
assert(src->num_components == 1);
return nir_format_float_to_unorm(b, src, &bits);
default:
break;
}
return src;
}
static nir_def *resolve_samples(nir_builder *b,
nir_def **samples,
unsigned num_samples,
enum pvr_transfer_pbe_pixel_src format,
enum pvr_resolve_op resolve_op)
{
nir_def *accum = NULL;
nir_def *coeff = NULL;
nir_op op;
switch (resolve_op) {
case PVR_RESOLVE_BLEND:
op = nir_op_ffma;
coeff = nir_imm_float(b, 1.0 / num_samples);
break;
case PVR_RESOLVE_MIN:
op = uses_int_resolve(format) ? nir_op_imin : nir_op_fmin;
break;
case PVR_RESOLVE_MAX:
op = uses_int_resolve(format) ? nir_op_imax : nir_op_fmax;
break;
default:
UNREACHABLE("Unsupported pvr_transfer_pbe_pixel_src");
}
prepare_samples_for_resolve(b, samples, num_samples, format, resolve_op);
if (resolve_op == PVR_RESOLVE_BLEND)
accum = nir_fmul(b, samples[0], coeff);
else
accum = samples[0];
for (unsigned i = 1; i < num_samples; i++) {
if (resolve_op == PVR_RESOLVE_BLEND)
accum = nir_ffma(b, samples[i], coeff, accum);
else
accum = nir_build_alu2(b, op, samples[i], accum);
}
return post_process_resolve(b, accum, format, resolve_op);
}
static nir_def *pvr_uscgen_tq_frag_conv(nir_builder *b,
nir_def *src,
enum pvr_transfer_pbe_pixel_src format)
{
unsigned bits;
switch (format) {
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32:
bits = 32;
return nir_format_unorm_to_float(
b,
nir_iand_imm(b, nir_channel(b, src, 0), BITFIELD_MASK(24)),
&bits);
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F:
bits = 32;
return nir_format_unorm_to_float(b, nir_channel(b, src, 0), &bits);
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8:
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8:
bits = 24;
return nir_format_float_to_unorm(b, nir_channel(b, src, 0), &bits);
case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8:
return nir_ushr_imm(b, nir_channel(b, src, 0), 8);
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8:
return nir_vec2(b,
nir_undef(b, 1, 32),
nir_ushr_imm(b, nir_channel(b, src, 0), 24));
case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB:
return nir_ushr_imm(b, nir_channel(b, src, 0), 24);
case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8:
src = nir_channel(b, src, 0);
return nir_mask_shift_or(b,
nir_ushr_imm(b, src, 24),
src,
BITFIELD_MASK(24),
8);
case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45:
return nir_vec2(b,
nir_undef(b, 1, 32),
nir_ushr_imm(b, nir_channel(b, src, 0), 24));
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8:
return nir_vec2(b, nir_undef(b, 1, 32), nir_channel(b, src, 0));
case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8:
return nir_ishl_imm(b, nir_channel(b, src, 0), 24);
default:
assert(!needs_conversion(format));
}
return src;
}
static nir_def *
pvr_uscgen_tq_frag_load(nir_builder *b,
uint32_t load_idx,
nir_def *coords,
const struct pvr_tq_shader_properties *shader_props,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout)
{
const struct pvr_tq_layer_properties *layer_props =
&shader_props->layer_props;
const unsigned num_samples = (shader_props->full_rate || !layer_props->msaa)
? 1
: layer_props->sample_count;
nir_def *samples[PVR_MAX_SAMPLE_COUNT];
for (unsigned sample_idx = 0; sample_idx < num_samples; sample_idx++) {
assert(load_idx < sh_reg_layout->combined_image_samplers.count);
nir_def *tex_state = nir_load_preamble(
b,
4,
32,
.base =
sh_reg_layout->combined_image_samplers.offsets[load_idx].image);
nir_def *smp_state = nir_load_preamble(
b,
4,
32,
.base =
sh_reg_layout->combined_image_samplers.offsets[load_idx].sampler);
pco_smp_params params = {
.tex_state = tex_state,
.smp_state = smp_state,
.dest_type = pvr_pbe_pixel_is_norm(layer_props->pbe_format)
? nir_type_float32
: nir_type_uint32,
.nncoords = shader_props->layer_props.linear ||
!shader_props->iterated,
.coords = coords,
};
if (layer_props->msaa) {
if (shader_props->full_rate) {
params.ms_index = nir_load_sample_id(b);
b->shader->info.fs.uses_sample_shading = true;
} else if (layer_props->resolve_op >= PVR_RESOLVE_SAMPLE0) {
params.ms_index =
nir_imm_int(b, layer_props->resolve_op - PVR_RESOLVE_SAMPLE0);
} else {
params.ms_index = nir_imm_int(b, sample_idx);
}
}
params.sampler_dim = GLSL_SAMPLER_DIM_2D;
if (layer_props->msaa)
params.sampler_dim = GLSL_SAMPLER_DIM_MS;
else if (layer_props->sample)
params.sampler_dim = GLSL_SAMPLER_DIM_3D;
nir_intrinsic_instr *smp = pco_emit_nir_smp(b, &params);
samples[sample_idx] = &smp->def;
}
if (num_samples == 1)
return samples[0];
return resolve_samples(b,
samples,
num_samples,
layer_props->pbe_format,
layer_props->resolve_op);
}
static nir_def *
pvr_uscgen_tq_frag_coords(nir_builder *b,
unsigned *next_sh,
const struct pvr_tq_shader_properties *shader_props,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout)
{
const struct pvr_tq_layer_properties *layer_props =
&shader_props->layer_props;
unsigned base_sh = sh_reg_layout->dynamic_consts.offset;
bool varying = shader_props->iterated;
unsigned location = varying ? VARYING_SLOT_VAR0 : VARYING_SLOT_POS;
unsigned pos_chans = varying ? (layer_props->sample ? 3 : 2) : 4;
const struct glsl_type *var_type = glsl_vec_type(pos_chans);
nir_variable *pos = nir_get_variable_with_location(b->shader,
nir_var_shader_in,
location,
var_type);
nir_def *coords_var = nir_load_var(b, pos);
nir_def *coords = nir_channels(b, coords_var, nir_component_mask(2));
assert(layer_props->layer_floats != PVR_INT_COORD_SET_FLOATS_6);
if (!varying && layer_props->layer_floats == PVR_INT_COORD_SET_FLOATS_4) {
/* coords.xy = coords.xy * (sh[0], sh[2]) + (sh[1], s[3]) */
nir_def *mult =
nir_vec2(b,
nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh),
nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh + 2));
nir_def *add =
nir_vec2(b,
nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh + 1),
nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh + 3));
coords = nir_fmad(b, coords, mult, add);
*next_sh += 4;
}
/* 3D texture, the depth comes from shared regs, or is iterated */
if (layer_props->sample) {
nir_def *depth =
varying ? nir_channel(b, coords_var, 2)
: nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh);
coords = nir_pad_vector(b, coords, 3);
coords = nir_vector_insert_imm(b, coords, depth, 2);
(*next_sh)++;
}
return coords;
}
pco_shader *pvr_uscgen_tq(pco_ctx *ctx,
const struct pvr_tq_shader_properties *shader_props,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout)
{
const struct pvr_tq_layer_properties *layer_props =
&shader_props->layer_props;
unsigned next_sh = 0;
unsigned pixel_size = pvr_pbe_pixel_size(layer_props->pbe_format);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
pco_nir_options(),
"TQ");
assert(layer_props->layer_floats != PVR_INT_COORD_SET_FLOATS_6);
assert(layer_props->byte_unwind == 0);
assert(layer_props->linear == false);
assert(pvr_pbe_pixel_num_loads(layer_props->pbe_format) == 1);
pco_data data = { 0 };
switch (pixel_size) {
case 1:
data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32_UINT;
break;
case 2:
data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32_UINT;
break;
case 3:
data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32B32_UINT;
break;
case 4:
data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32B32A32_UINT;
break;
default:
UNREACHABLE("");
}
data.fs.outputs[FRAG_RESULT_DATA0] = (pco_range){
.start = 0,
.count = pixel_size,
};
data.fs.output_reg[FRAG_RESULT_DATA0] = true;
nir_def *loaded_data;
nir_def *coords =
pvr_uscgen_tq_frag_coords(&b, &next_sh, shader_props, sh_reg_layout);
assert(!layer_props->linear);
loaded_data =
pvr_uscgen_tq_frag_load(&b, 0, coords, shader_props, sh_reg_layout);
loaded_data =
pvr_uscgen_tq_frag_conv(&b, loaded_data, layer_props->pbe_format);
loaded_data = pvr_uscgen_tq_frag_pack(&b,
&next_sh,
sh_reg_layout,
shader_props->pick_component,
loaded_data,
layer_props->pbe_format,
0);
nir_store_output(&b,
nir_resize_vector(&b, loaded_data, pixel_size),
nir_imm_int(&b, 0),
.base = 0,
.src_type = nir_type_invalid | 32,
.write_mask = BITFIELD_MASK(pixel_size),
.io_semantics.location = FRAG_RESULT_DATA0,
.io_semantics.num_slots = 1);
b.shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DATA0);
nir_variable *pos = nir_find_variable_with_location(b.shader,
nir_var_shader_in,
VARYING_SLOT_POS);
if (pos)
pos->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
nir_variable *var0 = nir_find_variable_with_location(b.shader,
nir_var_shader_in,
VARYING_SLOT_VAR0);
if (var0) {
var0->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
/* TODO: port and use allocate_var from pvr_pipeline.c */
data.fs.varyings[VARYING_SLOT_VAR0] = (pco_range){
.start = 0,
.count = glsl_count_dword_slots(var0->type, false)
* ROGUE_USC_COEFFICIENT_SET_SIZE,
};
}
nir_create_variable_with_location(b.shader,
nir_var_shader_out,
FRAG_RESULT_DATA0,
glsl_uvec_type(pixel_size));
sh_reg_layout->dynamic_consts.count = next_sh;
sh_reg_layout->driver_total += sh_reg_layout->dynamic_consts.count;
sh_reg_layout->compiler_out_total = 0;
sh_reg_layout->compiler_out.usc_constants.count = 0;
nir_jump(&b, nir_jump_return);
return build_shader(ctx, b.shader, &data);
}

View file

@ -15,6 +15,7 @@
#include "compiler/shader_enums.h"
#include "pco/pco.h"
#include "usc/pvr_uscgen.h"
/* NOP shader generation. */
pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage);
@ -31,4 +32,8 @@ struct pvr_tq_props {
pco_shader *pvr_usc_tq(pco_ctx *ctx, struct pvr_tq_props *props);
pco_shader *pvr_uscgen_tq(pco_ctx *ctx,
const struct pvr_tq_shader_properties *shader_props,
struct pvr_tq_frag_sh_reg_layout *sh_reg_layout);
#endif /* PVR_USC_H */