diff --git a/src/imagination/pco/pco.h b/src/imagination/pco/pco.h index bc3d7c132ac..ed915c18c47 100644 --- a/src/imagination/pco/pco.h +++ b/src/imagination/pco/pco.h @@ -53,4 +53,41 @@ void pco_validate_shader(pco_shader *shader, const char *when); void pco_print_shader(pco_shader *shader, FILE *fp, const char *when); void pco_print_binary(pco_shader *shader, FILE *fp, const char *when); + +#include "compiler/nir/nir_builder.h" + +typedef struct _pco_smp_params { + nir_def *tex_state; + nir_def *smp_state; + + nir_alu_type dest_type; + + enum glsl_sampler_dim sampler_dim; + + bool nncoords; + nir_def *coords; + nir_def *array_index; + + nir_def *proj; + + nir_def *lod_bias; + nir_def *lod_replace; + nir_def *lod_ddx; + nir_def *lod_ddy; + + nir_def *addr_lo; + nir_def *addr_hi; + + nir_def *offset; + nir_def *ms_index; + + nir_def *write_data; + + bool sample_coeffs; + bool sample_raw; + unsigned sample_components; + + bool int_mode; +} pco_smp_params; +nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b, pco_smp_params *params); #endif /* PCO_H */ diff --git a/src/imagination/pco/pco_binary.c b/src/imagination/pco/pco_binary.c index faded9b8fed..aae9172ded2 100644 --- a/src/imagination/pco/pco_binary.c +++ b/src/imagination/pco/pco_binary.c @@ -137,6 +137,9 @@ void pco_encode_ir(pco_ctx *ctx, pco_shader *shader) */ unsigned pco_shader_binary_size(pco_shader *shader) { + if (!shader) + return 0; + return shader->binary.size; } @@ -148,5 +151,7 @@ unsigned pco_shader_binary_size(pco_shader *shader) */ const void *pco_shader_binary_data(pco_shader *shader) { + if (!shader) + return NULL; return shader->binary.data; } diff --git a/src/imagination/pco/pco_nir_tex.c b/src/imagination/pco/pco_nir_tex.c index a2be5017455..59904502a5a 100644 --- a/src/imagination/pco/pco_nir_tex.c +++ b/src/imagination/pco/pco_nir_tex.c @@ -226,42 +226,7 @@ static inline void unpack_base_addr(nir_builder *b, *base_addr_hi = STATE_UNPACK(b, tex_state_word, 3, 14, 8); } -typedef struct _pco_smp_params { - nir_def *tex_state; - nir_def *smp_state; - - nir_alu_type dest_type; - - enum glsl_sampler_dim sampler_dim; - - bool nncoords; - nir_def *coords; - nir_def *array_index; - - nir_def *proj; - - nir_def *lod_bias; - nir_def *lod_replace; - nir_def *lod_ddx; - nir_def *lod_ddy; - - nir_def *addr_lo; - nir_def *addr_hi; - - nir_def *offset; - nir_def *ms_index; - - nir_def *write_data; - - bool sample_coeffs; - bool sample_raw; - unsigned sample_components; - - bool int_mode; -} pco_smp_params; - -static nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b, - pco_smp_params *params) +nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b, pco_smp_params *params) { nir_def *comps[NIR_MAX_VEC_COMPONENTS]; unsigned count = 0; @@ -427,9 +392,11 @@ static nir_intrinsic_instr *pco_emit_nir_smp(nir_builder *b, assert(!params->sample_coeffs); assert(!params->sample_raw); - assert(params->sample_components > 0); assert(!params->write_data); + if (!params->sample_components) + params->sample_components = 4; + nir_def *def = nir_smp_pco(b, params->sample_components, smp_data, diff --git a/src/imagination/pco/pco_ra.c b/src/imagination/pco/pco_ra.c index 205cb5ad4de..13c0bce642f 100644 --- a/src/imagination/pco/pco_ra.c +++ b/src/imagination/pco/pco_ra.c @@ -472,8 +472,14 @@ static bool pco_ra_func(pco_func *func, pco_ref_xfer_mods(&src, psrc, false); - if (!pco_refs_are_equal(src, dest, true)) - pco_mbyp(&b, dest, src, .exec_cnd = exec_cnd); + if (!pco_refs_are_equal(src, dest, true)) { + if (pco_ref_is_reg(src) && + pco_ref_get_reg_class(src) == PCO_REG_CLASS_SPEC) { + pco_movs1(&b, dest, src, .exec_cnd = exec_cnd); + } else { + pco_mbyp(&b, dest, src, .exec_cnd = exec_cnd); + } + } } temps = MAX2(temps, temp_dest_base + offset + chans); diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 58b93a4c25a..c217f644a59 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -1166,6 +1166,15 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) instr = trans_load_output_fs(tctx, intr, dest); break; + case nir_intrinsic_load_preamble: + instr = pco_mov(&tctx->b, + dest, + pco_ref_hwreg_vec(nir_intrinsic_base(intr), + PCO_REG_CLASS_SHARED, + pco_ref_get_chans(dest)), + .rpt = pco_ref_get_chans(dest)); + break; + case nir_intrinsic_load_push_constant: instr = trans_load_common_store(tctx, diff --git a/src/imagination/vulkan/pvr_transfer_frag_store.c b/src/imagination/vulkan/pvr_transfer_frag_store.c index 95e5721b55a..ca9e4c4712a 100644 --- a/src/imagination/vulkan/pvr_transfer_frag_store.c +++ b/src/imagination/vulkan/pvr_transfer_frag_store.c @@ -35,6 +35,7 @@ #include "pvr_private.h" #include "pvr_transfer_frag_store.h" #include "pvr_types.h" +#include "pvr_usc.h" #include "usc/pvr_uscgen.h" #include "util/hash_table.h" #include "util/macros.h" @@ -187,7 +188,6 @@ static VkResult pvr_transfer_frag_store_entry_data_compile( struct pvr_tq_frag_sh_reg_layout *sh_reg_layout = &entry_data->sh_reg_layout; uint32_t next_free_sh_reg = 0; - struct util_dynarray shader; VkResult result; /* TODO: Allocate all combined image samplers if needed? Otherwise change the @@ -208,17 +208,19 @@ static VkResult pvr_transfer_frag_store_entry_data_compile( sh_reg_layout->driver_total = next_free_sh_reg; - pvr_uscgen_tq_frag(shader_props, - &entry_data->sh_reg_layout, - num_usc_temps_out, - &shader); + pco_shader *tq = + pvr_uscgen_tq(device->pdevice->pco_ctx, shader_props, sh_reg_layout); + + *num_usc_temps_out = pco_shader_data(tq)->common.temps; result = pvr_gpu_upload_usc(device, - util_dynarray_begin(&shader), - util_dynarray_num_elements(&shader, uint8_t), + pco_shader_binary_data(tq), + pco_shader_binary_size(tq), cache_line_size, &entry_data->usc_upload); - util_dynarray_fini(&shader); + + ralloc_free(tq); + if (result != VK_SUCCESS) return result; diff --git a/src/imagination/vulkan/pvr_usc.c b/src/imagination/vulkan/pvr_usc.c index 99d1ddacbf2..48d5c60efe0 100644 --- a/src/imagination/vulkan/pvr_usc.c +++ b/src/imagination/vulkan/pvr_usc.c @@ -12,12 +12,19 @@ #include "nir/nir.h" #include "nir/nir_builder.h" +#include "nir/nir_format_convert.h" +#include "nir/nir_conversion_builder.h" #include "pco/pco.h" #include "pco/pco_data.h" #include "pco_uscgen_programs.h" +#include "pvr_common.h" +#include "pvr_formats.h" #include "pvr_usc.h" +#include "usc/pvr_uscgen.h" #include "util/macros.h" +#define PVR_MAX_SAMPLE_COUNT 8 + /** * Common function to build a NIR shader and export the binary. * @@ -84,3 +91,804 @@ pco_shader *pvr_usc_tq(pco_ctx *ctx, struct pvr_tq_props *props) { UNREACHABLE("finishme: pvr_usc_tq"); } + +static bool needs_packing(enum pvr_transfer_pbe_pixel_src format) +{ + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64: + case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2: + case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45: + case PVR_TRANSFER_PBE_PIXEL_SRC_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32: + case PVR_TRANSFER_PBE_PIXEL_SRC_F32: + case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_S8D24: + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW128: + case PVR_TRANSFER_PBE_PIXEL_SRC_F32X4: + return false; + default: + break; + } + return true; +} + +static bool needs_conversion(enum pvr_transfer_pbe_pixel_src format) +{ + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB: + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_Y_UV_INTERLEAVED: + case PVR_TRANSFER_PBE_PIXEL_SRC_YVU_PACKED: + case PVR_TRANSFER_PBE_PIXEL_SRC_Y_U_V: + case PVR_TRANSFER_PBE_PIXEL_SRC_YUV_PACKED: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8: + return true; + default: + break; + } + return false; +} + +static void +int_format_signs(enum pvr_transfer_pbe_pixel_src format, bool *src, bool *dst) +{ + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_US8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32: + case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32: + case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102: + *src = false; + break; + case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32: + case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102: + *src = true; + break; + default: + UNREACHABLE("Invalid format"); + } + + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32: + case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32: + case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102: + *dst = false; + break; + case PVR_TRANSFER_PBE_PIXEL_SRC_US8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32: + case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32: + *dst = true; + break; + default: + UNREACHABLE("Invalid format"); + } +} + +static nir_def * +picked_component(nir_builder *b, + nir_def *src, + unsigned *next_sh, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout) +{ + unsigned base_sh = sh_reg_layout->dynamic_consts.offset; + nir_variable *pos = nir_get_variable_with_location(b->shader, + nir_var_shader_in, + VARYING_SLOT_POS, + glsl_vec4_type()); + nir_def *coord_x = nir_f2i32(b, nir_channel(b, nir_load_var(b, pos), 0)); + nir_def *mask = nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh); + nir_def *offset = + nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh + 1); + nir_def *comp_idx = nir_iand(b, nir_isub(b, coord_x, offset), mask); + nir_def *shift_val = nir_imul_imm(b, comp_idx, 8); + + *next_sh += 2; + return nir_ushr(b, src, shift_val); +} + +static nir_def *pack_int_value(nir_builder *b, + unsigned *next_sh, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout, + bool pick_component, + nir_def *src, + enum pvr_transfer_pbe_pixel_src format) +{ + unsigned src_num_components = 4; + const unsigned bits_8[] = { 8, 8, 8, 8 }; + const unsigned bits_10[] = { 10, 10, 10, 2 }; + const unsigned bits_16[] = { 16, 16, 16, 16 }; + const unsigned bits_32[] = { 32, 32, 32, 32 }; + const unsigned *bits; + bool src_signed, dst_signed; + int_format_signs(format, &src_signed, &dst_signed); + + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_US8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888: + bits = bits_8; + break; + case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16: + bits = bits_16; + break; + case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32: + case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32: + case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32: + case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32: + bits = bits_32; + break; + case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102: + bits = bits_10; + break; + default: + UNREACHABLE("Invalid format"); + } + + if (format == PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32 || + format == PVR_TRANSFER_PBE_PIXEL_SRC_US32S32) { + src_num_components = 2; + } + + if (format == PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102 || + format == PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102) { + unsigned swiz[] = { 2, 1, 0, 3 }; + src = nir_swizzle(b, src, swiz, 4); + } + + if (src_signed != dst_signed) { + src = nir_convert_with_rounding(b, + src, + src_signed ? nir_type_int : nir_type_uint, + dst_signed ? nir_type_int32 + : nir_type_uint32, + nir_rounding_mode_undef, + true); + } + + if (dst_signed) + src = nir_format_clamp_sint(b, src, bits); + else + src = nir_format_clamp_uint(b, src, bits); + if ((bits[0] < 32) && dst_signed) + src = nir_format_mask_uvec(b, src, bits); + + if (bits != bits_16) { + src = nir_format_pack_uint(b, src, bits, src_num_components); + } else { + src = + nir_vec2(b, + nir_format_pack_uint(b, nir_channels(b, src, 0x3), bits, 2), + nir_format_pack_uint(b, nir_channels(b, src, 0xc), bits, 2)); + } + + if (!pick_component) + return src; + + return picked_component(b, src, next_sh, sh_reg_layout); +} + +static nir_def *merge_depth_stencil(nir_builder *b, + nir_def *src, + enum pipe_format format, + bool merge_depth, + unsigned load_idx) +{ + nir_def *dst; + unsigned mask; + + assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || + format == PIPE_FORMAT_Z24_UNORM_S8_UINT); + + dst = nir_load_output(b, + format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ? 2 : 1, + 32, + nir_imm_int(b, 0), + .base = 0, + .dest_type = nir_type_invalid | 32, + .io_semantics.location = FRAG_RESULT_DATA0 + load_idx, + .io_semantics.num_slots = 1, + .io_semantics.fb_fetch_output = true); + + b->shader->info.outputs_read |= BITFIELD64_BIT(FRAG_RESULT_DATA0 + load_idx); + b->shader->info.fs.uses_fbfetch_output = true; + + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + if (merge_depth) + return nir_vec2(b, nir_channel(b, src, 0), nir_channel(b, dst, 1)); + else + return nir_vec2(b, nir_channel(b, dst, 0), nir_channel(b, src, 1)); + } + + if (merge_depth) + mask = BITFIELD_MASK(24); + else + mask = BITFIELD_RANGE(24, 8); + + return nir_ior(b, nir_iand_imm(b, src, mask), nir_iand_imm(b, dst, ~mask)); +} + +static nir_def * +pvr_uscgen_tq_frag_pack(nir_builder *b, + unsigned *next_sh, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout, + bool pick_component, + nir_def *src, + enum pvr_transfer_pbe_pixel_src format, + unsigned load_idx) +{ + if (!needs_packing(format)) + return src; + + /* Integer packing */ + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_UU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_US8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_UU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_US16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS8888: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU16U16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SS16S16: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU32U32: + case PVR_TRANSFER_PBE_PIXEL_SRC_S4XU32: + case PVR_TRANSFER_PBE_PIXEL_SRC_US32S32: + case PVR_TRANSFER_PBE_PIXEL_SRC_U4XS32: + case PVR_TRANSFER_PBE_PIXEL_SRC_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_SU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_UU1010102: + case PVR_TRANSFER_PBE_PIXEL_SRC_RBSWAP_SU1010102: + return pack_int_value(b, + next_sh, + sh_reg_layout, + pick_component, + src, + format); + + case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16: + return nir_vec2(b, + nir_pack_half_2x16(b, nir_channels(b, src, 0x3)), + nir_pack_half_2x16(b, nir_channels(b, src, 0xc))); + case PVR_TRANSFER_PBE_PIXEL_SRC_U16NORM: + return nir_vec2(b, + nir_pack_unorm_2x16(b, nir_channels(b, src, 0x3)), + nir_pack_unorm_2x16(b, nir_channels(b, src, 0xc))); + case PVR_TRANSFER_PBE_PIXEL_SRC_S16NORM: + return nir_vec2(b, + nir_pack_snorm_2x16(b, nir_channels(b, src, 0x3)), + nir_pack_snorm_2x16(b, nir_channels(b, src, 0xc))); + case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8: + return nir_pack_unorm_4x8(b, src); + + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8: + return merge_depth_stencil(b, + src, + PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, + false, + load_idx); + + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8: + return merge_depth_stencil(b, + src, + PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, + true, + load_idx); + + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8: + return merge_depth_stencil(b, + src, + PIPE_FORMAT_Z24_UNORM_S8_UINT, + false, + load_idx); + + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8: + return merge_depth_stencil(b, + src, + PIPE_FORMAT_Z24_UNORM_S8_UINT, + true, + load_idx); + default: + UNREACHABLE("Unimplemented pvr_transfer_pbe_pixel_src"); + } +} + +static bool uses_int_resolve(enum pvr_transfer_pbe_pixel_src format) +{ + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_F32: + case PVR_TRANSFER_PBE_PIXEL_SRC_F16F16: + case PVR_TRANSFER_PBE_PIXEL_SRC_F16_U8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8: + return false; + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW32: + case PVR_TRANSFER_PBE_PIXEL_SRC_RAW64: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB: + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D32S8_D32S8: + return true; + default: + UNREACHABLE("Unsupported pvr_transfer_pbe_pixel_src"); + } + return false; +} + +static void prepare_samples_for_resolve(nir_builder *b, + nir_def **samples, + unsigned num_samples, + enum pvr_transfer_pbe_pixel_src format, + enum pvr_resolve_op resolve_op) +{ + unsigned num_components; + + if (resolve_op == PVR_RESOLVE_MIN || resolve_op == PVR_RESOLVE_MAX) { + if (format != PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8) + return; + + /* Mask out the stencil component since it is in the significant bits */ + for (unsigned i = 0; i < num_samples; i++) + samples[i] = nir_iand_imm(b, samples[i], BITFIELD_MASK(24)); + + return; + } + + assert(resolve_op == PVR_RESOLVE_BLEND); + + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB: + /* Mask out depth and convert to f32 */ + for (unsigned i = 0; i < num_samples; i++) { + samples[i] = nir_ushr_imm(b, samples[i], 24); + samples[i] = nir_u2f32(b, nir_channel(b, samples[i], 0)); + } + return; + + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8: + /* Mask out stencil and convert to f32 */ + for (unsigned i = 0; i < num_samples; i++) { + samples[i] = nir_iand_imm(b, samples[i], ~BITFIELD_RANGE(24, 8)); + samples[i] = nir_u2f32(b, nir_channel(b, samples[i], 0)); + } + return; + + case PVR_TRANSFER_PBE_PIXEL_SRC_F32: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32S8_D32S8: + num_components = 1; + break; + case PVR_TRANSFER_PBE_PIXEL_SRC_F32X2: + num_components = 2; + break; + default: + assert(pvr_pbe_pixel_is_norm(format)); + num_components = 4; + break; + } + + for (unsigned i = 0; i < num_samples; i++) + samples[i] = nir_trim_vector(b, samples[i], num_components); +} + +static nir_def *post_process_resolve(nir_builder *b, + nir_def *src, + enum pvr_transfer_pbe_pixel_src format, + enum pvr_resolve_op resolve_op) +{ + unsigned bits; + + if (resolve_op != PVR_RESOLVE_BLEND) + return src; + + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB: + /* Convert back to unorm and shift back to correct place */ + bits = 8; + assert(src->num_components == 1); + src = nir_format_float_to_unorm(b, src, &bits); + return nir_ishl_imm(b, src, 24); + + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D24S8_D24S8: + /* Convert back to unorm */ + bits = 24; + assert(src->num_components == 1); + return nir_format_float_to_unorm(b, src, &bits); + + default: + break; + } + + return src; +} + +static nir_def *resolve_samples(nir_builder *b, + nir_def **samples, + unsigned num_samples, + enum pvr_transfer_pbe_pixel_src format, + enum pvr_resolve_op resolve_op) +{ + nir_def *accum = NULL; + nir_def *coeff = NULL; + nir_op op; + + switch (resolve_op) { + case PVR_RESOLVE_BLEND: + op = nir_op_ffma; + coeff = nir_imm_float(b, 1.0 / num_samples); + break; + + case PVR_RESOLVE_MIN: + op = uses_int_resolve(format) ? nir_op_imin : nir_op_fmin; + break; + + case PVR_RESOLVE_MAX: + op = uses_int_resolve(format) ? nir_op_imax : nir_op_fmax; + break; + + default: + UNREACHABLE("Unsupported pvr_transfer_pbe_pixel_src"); + } + + prepare_samples_for_resolve(b, samples, num_samples, format, resolve_op); + + if (resolve_op == PVR_RESOLVE_BLEND) + accum = nir_fmul(b, samples[0], coeff); + else + accum = samples[0]; + + for (unsigned i = 1; i < num_samples; i++) { + if (resolve_op == PVR_RESOLVE_BLEND) + accum = nir_ffma(b, samples[i], coeff, accum); + else + accum = nir_build_alu2(b, op, samples[i], accum); + } + + return post_process_resolve(b, accum, format, resolve_op); +} + +static nir_def *pvr_uscgen_tq_frag_conv(nir_builder *b, + nir_def *src, + enum pvr_transfer_pbe_pixel_src format) +{ + unsigned bits; + switch (format) { + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D24_D32: + bits = 32; + return nir_format_unorm_to_float( + b, + nir_iand_imm(b, nir_channel(b, src, 0), BITFIELD_MASK(24)), + &bits); + + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32U_D32F: + bits = 32; + return nir_format_unorm_to_float(b, nir_channel(b, src, 0), &bits); + + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_D32_D24S8: + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32_D24S8: + bits = 24; + return nir_format_float_to_unorm(b, nir_channel(b, src, 0), &bits); + + case PVR_TRANSFER_PBE_PIXEL_SRC_DMRG_D32U_D24S8: + return nir_ushr_imm(b, nir_channel(b, src, 0), 8); + + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_D24S8_D32S8: + return nir_vec2(b, + nir_undef(b, 1, 32), + nir_ushr_imm(b, nir_channel(b, src, 0), 24)); + + case PVR_TRANSFER_PBE_PIXEL_SRC_SWAP_LMSB: + return nir_ushr_imm(b, nir_channel(b, src, 0), 24); + + case PVR_TRANSFER_PBE_PIXEL_SRC_CONV_S8D24_D24S8: + src = nir_channel(b, src, 0); + return nir_mask_shift_or(b, + nir_ushr_imm(b, src, 24), + src, + BITFIELD_MASK(24), + 8); + + case PVR_TRANSFER_PBE_PIXEL_SRC_MOV_BY45: + return nir_vec2(b, + nir_undef(b, 1, 32), + nir_ushr_imm(b, nir_channel(b, src, 0), 24)); + + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D32S8: + return nir_vec2(b, nir_undef(b, 1, 32), nir_channel(b, src, 0)); + + case PVR_TRANSFER_PBE_PIXEL_SRC_SMRG_S8_D24S8: + return nir_ishl_imm(b, nir_channel(b, src, 0), 24); + + default: + assert(!needs_conversion(format)); + } + + return src; +} + +static nir_def * +pvr_uscgen_tq_frag_load(nir_builder *b, + uint32_t load_idx, + nir_def *coords, + const struct pvr_tq_shader_properties *shader_props, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout) +{ + const struct pvr_tq_layer_properties *layer_props = + &shader_props->layer_props; + + const unsigned num_samples = (shader_props->full_rate || !layer_props->msaa) + ? 1 + : layer_props->sample_count; + + nir_def *samples[PVR_MAX_SAMPLE_COUNT]; + + for (unsigned sample_idx = 0; sample_idx < num_samples; sample_idx++) { + assert(load_idx < sh_reg_layout->combined_image_samplers.count); + + nir_def *tex_state = nir_load_preamble( + b, + 4, + 32, + .base = + sh_reg_layout->combined_image_samplers.offsets[load_idx].image); + + nir_def *smp_state = nir_load_preamble( + b, + 4, + 32, + .base = + sh_reg_layout->combined_image_samplers.offsets[load_idx].sampler); + + pco_smp_params params = { + .tex_state = tex_state, + .smp_state = smp_state, + + .dest_type = pvr_pbe_pixel_is_norm(layer_props->pbe_format) + ? nir_type_float32 + : nir_type_uint32, + + .nncoords = shader_props->layer_props.linear || + !shader_props->iterated, + .coords = coords, + }; + + if (layer_props->msaa) { + if (shader_props->full_rate) { + params.ms_index = nir_load_sample_id(b); + b->shader->info.fs.uses_sample_shading = true; + } else if (layer_props->resolve_op >= PVR_RESOLVE_SAMPLE0) { + params.ms_index = + nir_imm_int(b, layer_props->resolve_op - PVR_RESOLVE_SAMPLE0); + } else { + params.ms_index = nir_imm_int(b, sample_idx); + } + } + + params.sampler_dim = GLSL_SAMPLER_DIM_2D; + if (layer_props->msaa) + params.sampler_dim = GLSL_SAMPLER_DIM_MS; + else if (layer_props->sample) + params.sampler_dim = GLSL_SAMPLER_DIM_3D; + + nir_intrinsic_instr *smp = pco_emit_nir_smp(b, ¶ms); + samples[sample_idx] = &smp->def; + } + + if (num_samples == 1) + return samples[0]; + + return resolve_samples(b, + samples, + num_samples, + layer_props->pbe_format, + layer_props->resolve_op); +} + +static nir_def * +pvr_uscgen_tq_frag_coords(nir_builder *b, + unsigned *next_sh, + const struct pvr_tq_shader_properties *shader_props, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout) +{ + const struct pvr_tq_layer_properties *layer_props = + &shader_props->layer_props; + unsigned base_sh = sh_reg_layout->dynamic_consts.offset; + bool varying = shader_props->iterated; + unsigned location = varying ? VARYING_SLOT_VAR0 : VARYING_SLOT_POS; + unsigned pos_chans = varying ? (layer_props->sample ? 3 : 2) : 4; + + const struct glsl_type *var_type = glsl_vec_type(pos_chans); + nir_variable *pos = nir_get_variable_with_location(b->shader, + nir_var_shader_in, + location, + var_type); + nir_def *coords_var = nir_load_var(b, pos); + nir_def *coords = nir_channels(b, coords_var, nir_component_mask(2)); + + assert(layer_props->layer_floats != PVR_INT_COORD_SET_FLOATS_6); + if (!varying && layer_props->layer_floats == PVR_INT_COORD_SET_FLOATS_4) { + /* coords.xy = coords.xy * (sh[0], sh[2]) + (sh[1], s[3]) */ + nir_def *mult = + nir_vec2(b, + nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh), + nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh + 2)); + nir_def *add = + nir_vec2(b, + nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh + 1), + nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh + 3)); + coords = nir_fmad(b, coords, mult, add); + *next_sh += 4; + } + + /* 3D texture, the depth comes from shared regs, or is iterated */ + if (layer_props->sample) { + nir_def *depth = + varying ? nir_channel(b, coords_var, 2) + : nir_load_preamble(b, 1, 32, .base = *next_sh + base_sh); + + coords = nir_pad_vector(b, coords, 3); + coords = nir_vector_insert_imm(b, coords, depth, 2); + (*next_sh)++; + } + + return coords; +} + +pco_shader *pvr_uscgen_tq(pco_ctx *ctx, + const struct pvr_tq_shader_properties *shader_props, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout) +{ + const struct pvr_tq_layer_properties *layer_props = + &shader_props->layer_props; + unsigned next_sh = 0; + + unsigned pixel_size = pvr_pbe_pixel_size(layer_props->pbe_format); + + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, + pco_nir_options(), + "TQ"); + + assert(layer_props->layer_floats != PVR_INT_COORD_SET_FLOATS_6); + assert(layer_props->byte_unwind == 0); + assert(layer_props->linear == false); + + assert(pvr_pbe_pixel_num_loads(layer_props->pbe_format) == 1); + + pco_data data = { 0 }; + + switch (pixel_size) { + case 1: + data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32_UINT; + break; + + case 2: + data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32_UINT; + break; + + case 3: + data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32B32_UINT; + break; + + case 4: + data.fs.output_formats[FRAG_RESULT_DATA0] = PIPE_FORMAT_R32G32B32A32_UINT; + break; + + default: + UNREACHABLE(""); + } + + data.fs.outputs[FRAG_RESULT_DATA0] = (pco_range){ + .start = 0, + .count = pixel_size, + }; + data.fs.output_reg[FRAG_RESULT_DATA0] = true; + + nir_def *loaded_data; + nir_def *coords = + pvr_uscgen_tq_frag_coords(&b, &next_sh, shader_props, sh_reg_layout); + + assert(!layer_props->linear); + + loaded_data = + pvr_uscgen_tq_frag_load(&b, 0, coords, shader_props, sh_reg_layout); + + loaded_data = + pvr_uscgen_tq_frag_conv(&b, loaded_data, layer_props->pbe_format); + + loaded_data = pvr_uscgen_tq_frag_pack(&b, + &next_sh, + sh_reg_layout, + shader_props->pick_component, + loaded_data, + layer_props->pbe_format, + 0); + + nir_store_output(&b, + nir_resize_vector(&b, loaded_data, pixel_size), + nir_imm_int(&b, 0), + .base = 0, + .src_type = nir_type_invalid | 32, + .write_mask = BITFIELD_MASK(pixel_size), + .io_semantics.location = FRAG_RESULT_DATA0, + .io_semantics.num_slots = 1); + + b.shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DATA0); + + nir_variable *pos = nir_find_variable_with_location(b.shader, + nir_var_shader_in, + VARYING_SLOT_POS); + if (pos) + pos->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + + nir_variable *var0 = nir_find_variable_with_location(b.shader, + nir_var_shader_in, + VARYING_SLOT_VAR0); + if (var0) { + var0->data.interpolation = INTERP_MODE_NOPERSPECTIVE; + /* TODO: port and use allocate_var from pvr_pipeline.c */ + data.fs.varyings[VARYING_SLOT_VAR0] = (pco_range){ + .start = 0, + .count = glsl_count_dword_slots(var0->type, false) + + * ROGUE_USC_COEFFICIENT_SET_SIZE, + }; + } + + nir_create_variable_with_location(b.shader, + nir_var_shader_out, + FRAG_RESULT_DATA0, + glsl_uvec_type(pixel_size)); + + sh_reg_layout->dynamic_consts.count = next_sh; + sh_reg_layout->driver_total += sh_reg_layout->dynamic_consts.count; + sh_reg_layout->compiler_out_total = 0; + sh_reg_layout->compiler_out.usc_constants.count = 0; + + nir_jump(&b, nir_jump_return); + + return build_shader(ctx, b.shader, &data); +} diff --git a/src/imagination/vulkan/pvr_usc.h b/src/imagination/vulkan/pvr_usc.h index ccfa6479239..2298ebc50dc 100644 --- a/src/imagination/vulkan/pvr_usc.h +++ b/src/imagination/vulkan/pvr_usc.h @@ -15,6 +15,7 @@ #include "compiler/shader_enums.h" #include "pco/pco.h" +#include "usc/pvr_uscgen.h" /* NOP shader generation. */ pco_shader *pvr_usc_nop(pco_ctx *ctx, mesa_shader_stage stage); @@ -31,4 +32,8 @@ struct pvr_tq_props { pco_shader *pvr_usc_tq(pco_ctx *ctx, struct pvr_tq_props *props); +pco_shader *pvr_uscgen_tq(pco_ctx *ctx, + const struct pvr_tq_shader_properties *shader_props, + struct pvr_tq_frag_sh_reg_layout *sh_reg_layout); + #endif /* PVR_USC_H */