From 10e571aebd2ec4995ee44e5e01e614dadb8a1c25 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 1 Dec 2025 11:47:15 -0500 Subject: [PATCH] pan/compiler: Move pan_ir.h into pan_compiler.h There is nothing IR about it. It's really the compiler interface file, so it should all go in pan_compiler.h. Acked-by: Erik Faye-Lund Acked-by: Boris Brezillon Part-of: --- src/gallium/drivers/panfrost/pan_device.h | 2 +- .../drivers/panfrost/pan_mod_conv_cso.h | 2 +- src/panfrost/clc/pan_compile.c | 1 - .../compiler/bifrost/bifrost_compile.c | 1 - .../compiler/bifrost/bifrost_compile.h | 2 +- src/panfrost/compiler/bifrost/compiler.h | 2 +- src/panfrost/compiler/meson.build | 2 - src/panfrost/compiler/midgard/compiler.h | 2 +- .../compiler/midgard/midgard_compile.h | 2 +- src/panfrost/compiler/pan_compiler.c | 54 +++ src/panfrost/compiler/pan_compiler.h | 387 ++++++++++++++++- src/panfrost/compiler/pan_ir.c | 84 ---- src/panfrost/compiler/pan_ir.h | 410 ------------------ src/panfrost/compiler/pan_nir.h | 1 - src/panfrost/lib/pan_earlyzs.c | 2 +- src/panfrost/lib/pan_shader.h | 2 +- src/panfrost/lib/tests/test-earlyzs.cpp | 2 +- src/panfrost/vulkan/panvk_shader.h | 1 - 18 files changed, 447 insertions(+), 512 deletions(-) delete mode 100644 src/panfrost/compiler/pan_ir.c delete mode 100644 src/panfrost/compiler/pan_ir.h diff --git a/src/gallium/drivers/panfrost/pan_device.h b/src/gallium/drivers/panfrost/pan_device.h index cf18b0b33a1..b58c2743877 100644 --- a/src/gallium/drivers/panfrost/pan_device.h +++ b/src/gallium/drivers/panfrost/pan_device.h @@ -38,7 +38,7 @@ #include "util/timespec.h" #include "util/u_dynarray.h" -#include "panfrost/compiler/pan_ir.h" +#include "panfrost/compiler/pan_compiler.h" #include "pan_blend_cso.h" #include "pan_fb_preload.h" #include "pan_pool.h" diff --git a/src/gallium/drivers/panfrost/pan_mod_conv_cso.h b/src/gallium/drivers/panfrost/pan_mod_conv_cso.h index 7531cad44a8..47e1283482a 100644 --- a/src/gallium/drivers/panfrost/pan_mod_conv_cso.h +++ b/src/gallium/drivers/panfrost/pan_mod_conv_cso.h @@ -26,7 +26,7 @@ #include "util/hash_table.h" -#include "panfrost/compiler/pan_ir.h" +#include "panfrost/compiler/pan_compiler.h" #include "drm-uapi/drm_fourcc.h" diff --git a/src/panfrost/clc/pan_compile.c b/src/panfrost/clc/pan_compile.c index 5feed5103dd..f1ca34932fa 100644 --- a/src/panfrost/clc/pan_compile.c +++ b/src/panfrost/clc/pan_compile.c @@ -26,7 +26,6 @@ #include #include #include -#include "panfrost/compiler/pan_ir.h" #include "util/macros.h" #include "util/u_dynarray.h" #include "util/u_printf.h" diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 070d914cde7..6e281284db3 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -31,7 +31,6 @@ #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_deref.h" #include "panfrost/compiler/pan_compiler.h" -#include "panfrost/compiler/pan_ir.h" #include "panfrost/compiler/pan_nir.h" #include "util/perf/cpu_trace.h" #include "util/u_debug.h" diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.h b/src/panfrost/compiler/bifrost/bifrost_compile.h index 56ac939cb63..7a21768b796 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.h +++ b/src/panfrost/compiler/bifrost/bifrost_compile.h @@ -27,7 +27,7 @@ #include #include #include "compiler/nir/nir.h" -#include "panfrost/compiler/pan_ir.h" +#include "panfrost/compiler/pan_compiler.h" #include "util/u_dynarray.h" struct bifrost_precompiled_kernel_sysvals { diff --git a/src/panfrost/compiler/bifrost/compiler.h b/src/panfrost/compiler/bifrost/compiler.h index c337bf1b7d5..29d54fa2071 100644 --- a/src/panfrost/compiler/bifrost/compiler.h +++ b/src/panfrost/compiler/bifrost/compiler.h @@ -28,7 +28,7 @@ #define __BIFROST_COMPILER_H #include "compiler/nir/nir.h" -#include "panfrost/compiler/pan_ir.h" +#include "panfrost/compiler/pan_compiler.h" #include "util/half_float.h" #include "util/shader_stats.h" #include "util/u_math.h" diff --git a/src/panfrost/compiler/meson.build b/src/panfrost/compiler/meson.build index bad5804d777..4ce16f92dd7 100644 --- a/src/panfrost/compiler/meson.build +++ b/src/panfrost/compiler/meson.build @@ -4,8 +4,6 @@ libpanfrost_compiler_files = files( 'pan_compiler.c', 'pan_compiler.h', - 'pan_ir.c', - 'pan_ir.h', 'pan_nir_collect_varyings.c', 'pan_nir_lower_frag_coord_zw.c', 'pan_nir_lower_framebuffer.c', diff --git a/src/panfrost/compiler/midgard/compiler.h b/src/panfrost/compiler/midgard/compiler.h index b936539ffdb..4786dcaaa15 100644 --- a/src/panfrost/compiler/midgard/compiler.h +++ b/src/panfrost/compiler/midgard/compiler.h @@ -38,7 +38,7 @@ #include "compiler/glsl_types.h" #include "compiler/nir/nir.h" -#include "panfrost/compiler/pan_ir.h" +#include "panfrost/compiler/pan_compiler.h" /* Forward declare */ struct midgard_block; diff --git a/src/panfrost/compiler/midgard/midgard_compile.h b/src/panfrost/compiler/midgard/midgard_compile.h index f03ce69abc5..13c9e7eb948 100644 --- a/src/panfrost/compiler/midgard/midgard_compile.h +++ b/src/panfrost/compiler/midgard/midgard_compile.h @@ -26,7 +26,7 @@ #define __MIDGARD_H_ #include "compiler/nir/nir.h" -#include "panfrost/compiler/pan_ir.h" +#include "panfrost/compiler/pan_compiler.h" #include "util/u_dynarray.h" void midgard_preprocess_nir(nir_shader *nir, unsigned gpu_id); diff --git a/src/panfrost/compiler/pan_compiler.c b/src/panfrost/compiler/pan_compiler.c index 0a1741a477e..966c691578e 100644 --- a/src/panfrost/compiler/pan_compiler.c +++ b/src/panfrost/compiler/pan_compiler.c @@ -108,6 +108,60 @@ pan_nir_lower_texture_late(nir_shader *nir, unsigned gpu_id) bifrost_lower_texture_late_nir(nir, gpu_id); } +/** Converts a per-component mask to a byte mask */ +uint16_t +pan_to_bytemask(unsigned bytes, unsigned mask) +{ + switch (bytes) { + case 0: + assert(mask == 0); + return 0; + + case 8: + return mask; + + case 16: { + unsigned space = + (mask & 0x1) | ((mask & 0x2) << (2 - 1)) | ((mask & 0x4) << (4 - 2)) | + ((mask & 0x8) << (6 - 3)) | ((mask & 0x10) << (8 - 4)) | + ((mask & 0x20) << (10 - 5)) | ((mask & 0x40) << (12 - 6)) | + ((mask & 0x80) << (14 - 7)); + + return space | (space << 1); + } + + case 32: { + unsigned space = (mask & 0x1) | ((mask & 0x2) << (4 - 1)) | + ((mask & 0x4) << (8 - 2)) | ((mask & 0x8) << (12 - 3)); + + return space | (space << 1) | (space << 2) | (space << 3); + } + + case 64: { + unsigned A = (mask & 0x1) ? 0xFF : 0x00; + unsigned B = (mask & 0x2) ? 0xFF : 0x00; + return A | (B << 8); + } + + default: + UNREACHABLE("Invalid register mode"); + } +} + +/* Could optimize with a better data structure if anyone cares, TODO: profile */ +unsigned +pan_lookup_pushed_ubo(struct pan_ubo_push *push, unsigned ubo, unsigned offs) +{ + struct pan_ubo_word word = {.ubo = ubo, .offset = offs}; + + for (unsigned i = 0; i < push->count; ++i) { + if (memcmp(push->words + i, &word, sizeof(word)) == 0) + return i; + } + + UNREACHABLE("UBO not pushed"); +} + void pan_disassemble(FILE *fp, const void *code, size_t size, unsigned gpu_id, bool verbose) diff --git a/src/panfrost/compiler/pan_compiler.h b/src/panfrost/compiler/pan_compiler.h index 71227cd4de6..0bf4f09accb 100644 --- a/src/panfrost/compiler/pan_compiler.h +++ b/src/panfrost/compiler/pan_compiler.h @@ -28,17 +28,398 @@ #include #include -typedef struct nir_shader nir_shader; -struct nir_shader_compiler_options; +#include "compiler/nir/nir_defines.h" +#include "compiler/shader_enums.h" +#include "util/u_dynarray.h" +#include "util/format/u_formats.h" +#include "util/shader_stats.h" + struct pan_shader_info; -const struct nir_shader_compiler_options * +const nir_shader_compiler_options * pan_get_nir_shader_compiler_options(unsigned arch); void pan_preprocess_nir(nir_shader *nir, unsigned gpu_id); void pan_optimize_nir(nir_shader *nir, unsigned gpu_id); void pan_postprocess_nir(nir_shader *nir, unsigned gpu_id); +/* Indices for named (non-XFB) varyings that are present. These are packed + * tightly so they correspond to a bitfield present (P) indexed by (1 << + * PAN_VARY_*). This has the nice property that you can lookup the buffer index + * of a given special field given a shift S by: + * + * idx = popcount(P & ((1 << S) - 1)) + * + * That is... look at all of the varyings that come earlier and count them, the + * count is the new index since plus one. Likewise, the total number of special + * buffers required is simply popcount(P) + */ + +enum pan_special_varying { + PAN_VARY_GENERAL = 0, + PAN_VARY_POSITION = 1, + PAN_VARY_PSIZ = 2, + PAN_VARY_PNTCOORD = 3, + PAN_VARY_FACE = 4, + PAN_VARY_FRAGCOORD = 5, + + /* Keep last */ + PAN_VARY_MAX, +}; + +/* Maximum number of attribute descriptors required for varyings. These include + * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL + * special varying */ +#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1) + +/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be + * consistent with the blob so we can compare traces easier. */ + +enum { PAN_VERTEX_ID = 16, PAN_INSTANCE_ID = 17, PAN_MAX_ATTRIBUTE }; + +/* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each. + * In practice, the maximum number of FAU slots is limited by implementation. + * All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the + * maximum number of 32-bit words is 128, since there are 2 words per FAU slot. + * + * Midgard can push at most 92 words, so this bound suffices. The Midgard + * compiler pushes less than this, as Midgard uses register-mapped uniforms + * instead of FAU, preventing large numbers of uniforms to be pushed for + * nontrivial programs. + */ +#define PAN_MAX_PUSH 128 + +/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so + * an offset to a word must be < 2^16. There are less than 2^8 UBOs */ + +struct pan_ubo_word { + uint16_t ubo; + uint16_t offset; +}; + +struct pan_ubo_push { + unsigned count; + struct pan_ubo_word words[PAN_MAX_PUSH]; +}; + +/* Helper for searching the above. Note this is O(N) to the number of pushed + * constants, do not run in the draw call hot path */ + +unsigned pan_lookup_pushed_ubo(struct pan_ubo_push *push, unsigned ubo, + unsigned offs); + +struct pan_compile_inputs { + unsigned gpu_id; + uint32_t gpu_variant; + /* Used on Bifrost and Valhall for pixel_local_storage load/store to convert + * the format to a descriptor. + */ + uint64_t (*get_conv_desc)(enum pipe_format fmt, unsigned rt, + unsigned force_size, bool dithered); + bool is_blend, is_blit; + struct { + unsigned nr_samples; + uint64_t bifrost_blend_desc; + } blend; + bool no_idvs; + uint32_t view_mask; + + nir_variable_mode robust2_modes; + /* Whether or not descriptor accesses should add additional robustness + * checks. */ + bool robust_descriptors; + + /* Mask of UBOs that may be moved to push constants */ + uint32_t pushable_ubos; + + /* Used on Valhall. + * + * Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0) + * written by the previous stage (fragment shader) or written by this + * stage (vertex shader). Bits are slots from gl_varying_slot. + * + * For modern APIs (GLES or VK), this should be 0. + */ + uint32_t fixed_varying_mask; + + /* Settings to move constants into the FAU. */ + struct { + uint32_t *values; + /* In multiples of 32bit. */ + uint32_t max_amount; + /* In multiples of 32bit. */ + uint32_t offset; + } fau_consts; + + union { + struct { + uint32_t rt_conv[8]; + } bifrost; + struct { + /* Use LD_VAR_BUF[_IMM] instead of LD_VAR[_IMM] to load varyings. */ + bool use_ld_var_buf; + } valhall; + }; +}; + +struct pan_shader_varying { + gl_varying_slot location; + enum pipe_format format; +}; + +struct bifrost_shader_blend_info { + nir_alu_type type; + uint32_t return_offset; + + /* mali_bifrost_register_file_format corresponding to nir_alu_type */ + unsigned format; +}; + +/* + * Unpacked form of a v7 message preload descriptor, produced by the compiler's + * message preload optimization. By splitting out this struct, the compiler does + * not need to know about data structure packing, avoiding a dependency on + * GenXML. + */ +struct bifrost_message_preload { + /* Whether to preload this message */ + bool enabled; + + /* Varying to load from */ + unsigned varying_index; + + /* Register type, FP32 otherwise */ + bool fp16; + + /* Number of components, ignored if texturing */ + unsigned num_components; + + /* If texture is set, performs a texture instruction according to + * texture_index, skip, and zero_lod. If texture is unset, only the + * varying load is performed. + */ + bool texture, skip, zero_lod; + unsigned texture_index; +}; + +struct bifrost_shader_info { + struct bifrost_shader_blend_info blend[8]; + nir_alu_type blend_src1_type; + bool wait_6, wait_7; + struct bifrost_message_preload messages[2]; + + /* Whether any flat varyings are loaded. This may disable optimizations + * that change the provoking vertex, since that would load incorrect + * values for flat varyings. + */ + bool uses_flat_shading; +}; + +struct midgard_shader_info { + unsigned first_tag; + union { + struct { + bool reads_raw_vertex_id; + } vs; + }; +}; + +struct pan_shader_info { + mesa_shader_stage stage; + unsigned work_reg_count; + unsigned tls_size; + unsigned wls_size; + + struct pan_stats stats, stats_idvs_varying; + + /* Bit mask of preloaded registers */ + uint64_t preload; + + uint32_t fau_consts_count; + uint32_t fau_consts[128]; + + union { + struct { + bool reads_frag_coord; + bool reads_point_coord; + bool reads_primitive_id; + bool reads_face; + bool can_discard; + bool writes_depth; + bool writes_stencil; + bool writes_coverage; + bool sidefx; + bool sample_shading; + bool early_fragment_tests; + bool can_early_z, can_fpk; + bool untyped_color_outputs; + uint32_t outputs_read; + } fs; + + struct { + bool writes_point_size; + + /* True if this shader needs the extended FIFO format for + * more than just point size. + */ + bool needs_extended_fifo; + + /* If the primary shader writes point size, the Valhall + * driver may need a variant that does not write point + * size. Offset to such a shader in the program binary. + * + * Zero if no such variant is required. + * + * Only used with IDVS on Valhall. + */ + unsigned no_psiz_offset; + + /* Set if Index-Driven Vertex Shading is in use */ + bool idvs; + + /* If IDVS is used, whether a varying shader is used */ + bool secondary_enable; + + /* If a varying shader is used, the varying shader's + * offset in the program binary + */ + unsigned secondary_offset; + + /* If IDVS is in use, number of work registers used by + * the varying shader + */ + unsigned secondary_work_reg_count; + + /* If IDVS is in use, bit mask of preloaded registers + * used by the varying shader + */ + uint64_t secondary_preload; + } vs; + + struct { + /* Is it legal to merge workgroups? This is true if the + * shader uses neither barriers nor shared memory. This + * requires caution: if the API allows specifying shared + * memory at launch time (instead of compile time), that + * memory will not be accounted for by the compiler. + * + * Used by the Valhall hardware. + */ + bool allow_merging_workgroups; + } cs; + }; + + /* Does the shader contains a barrier? or (for fragment shaders) does it + * require helper invocations, which demand the same ordering guarantees + * of the hardware? These notions are unified in the hardware, so we + * unify them here as well. + */ + bool contains_barrier; + bool separable; + bool writes_global; + uint64_t outputs_written; + + /* Floating point controls that the driver should try to honour */ + bool ftz_fp16, ftz_fp32; + + /* True if the shader contains a shader_clock instruction. */ + bool has_shader_clk_instr; + + unsigned sampler_count; + unsigned texture_count; + unsigned ubo_count; + unsigned attributes_read_count; + unsigned attribute_count; + unsigned attributes_read; + + struct { + unsigned input_count; + struct pan_shader_varying input[PAN_MAX_VARYINGS]; + unsigned output_count; + struct pan_shader_varying output[PAN_MAX_VARYINGS]; + + /* Bitfield of noperspective varyings, starting at VARYING_SLOT_VAR0 */ + uint32_t noperspective; + + /* Bitfield of special varyings. */ + uint32_t fixed_varyings; + } varyings; + + /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access + * Uniforms (Bifrost) */ + struct pan_ubo_push push; + + uint32_t ubo_mask; + + /* Quirk for GPUs that does not support auto32 types. */ + bool quirk_no_auto32; + + union { + struct bifrost_shader_info bifrost; + struct midgard_shader_info midgard; + }; +}; + +uint16_t pan_to_bytemask(unsigned bytes, unsigned mask); + +/* NIR passes to do some backend-specific lowering */ + +#define PAN_WRITEOUT_C 1 +#define PAN_WRITEOUT_Z 2 +#define PAN_WRITEOUT_S 4 +#define PAN_WRITEOUT_2 8 + +/* + * Helper returning the subgroup size. Generally, this is equal to the number of + * threads in a warp. For Midgard (including warping models), this returns 1, as + * subgroups are not supported. + */ +static inline unsigned +pan_subgroup_size(unsigned arch) +{ + if (arch >= 9) + return 16; + else if (arch >= 7) + return 8; + else if (arch >= 6) + return 4; + else + return 1; +} + +/* + * Helper extracting the table from a given handle of Valhall descriptor model. + */ +static inline unsigned +pan_res_handle_get_table(unsigned handle) +{ + unsigned table = handle >> 24; + + assert(table < 64); + return table; +} + +/* + * Helper returning the index from a given handle of Valhall descriptor model. + */ +static inline unsigned +pan_res_handle_get_index(unsigned handle) +{ + return handle & BITFIELD_MASK(24); +} + +/* + * Helper creating an handle for Valhall descriptor model. + */ +static inline unsigned +pan_res_handle(unsigned table, unsigned index) +{ + assert(table < 64); + assert(index < (1u << 24)); + + return (table << 24) | index; +} + void pan_disassemble(FILE *fp, const void *code, size_t size, unsigned gpu_id, bool verbose); diff --git a/src/panfrost/compiler/pan_ir.c b/src/panfrost/compiler/pan_ir.c deleted file mode 100644 index a19d7adf137..00000000000 --- a/src/panfrost/compiler/pan_ir.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2020 Collabora Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors (Collabora): - * Alyssa Rosenzweig - */ - -#include "pan_ir.h" -#include "util/macros.h" - -/* Converts a per-component mask to a byte mask */ - -uint16_t -pan_to_bytemask(unsigned bytes, unsigned mask) -{ - switch (bytes) { - case 0: - assert(mask == 0); - return 0; - - case 8: - return mask; - - case 16: { - unsigned space = - (mask & 0x1) | ((mask & 0x2) << (2 - 1)) | ((mask & 0x4) << (4 - 2)) | - ((mask & 0x8) << (6 - 3)) | ((mask & 0x10) << (8 - 4)) | - ((mask & 0x20) << (10 - 5)) | ((mask & 0x40) << (12 - 6)) | - ((mask & 0x80) << (14 - 7)); - - return space | (space << 1); - } - - case 32: { - unsigned space = (mask & 0x1) | ((mask & 0x2) << (4 - 1)) | - ((mask & 0x4) << (8 - 2)) | ((mask & 0x8) << (12 - 3)); - - return space | (space << 1) | (space << 2) | (space << 3); - } - - case 64: { - unsigned A = (mask & 0x1) ? 0xFF : 0x00; - unsigned B = (mask & 0x2) ? 0xFF : 0x00; - return A | (B << 8); - } - - default: - UNREACHABLE("Invalid register mode"); - } -} - -/* Could optimize with a better data structure if anyone cares, TODO: profile */ - -unsigned -pan_lookup_pushed_ubo(struct pan_ubo_push *push, unsigned ubo, unsigned offs) -{ - struct pan_ubo_word word = {.ubo = ubo, .offset = offs}; - - for (unsigned i = 0; i < push->count; ++i) { - if (memcmp(push->words + i, &word, sizeof(word)) == 0) - return i; - } - - UNREACHABLE("UBO not pushed"); -} diff --git a/src/panfrost/compiler/pan_ir.h b/src/panfrost/compiler/pan_ir.h deleted file mode 100644 index 05c0b40d51d..00000000000 --- a/src/panfrost/compiler/pan_ir.h +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Copyright (C) 2020 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef __PAN_IR_H -#define __PAN_IR_H - -#include -#include "compiler/nir/nir.h" -#include "util/hash_table.h" -#include "util/shader_stats.h" -#include "util/u_dynarray.h" - -/* Indices for named (non-XFB) varyings that are present. These are packed - * tightly so they correspond to a bitfield present (P) indexed by (1 << - * PAN_VARY_*). This has the nice property that you can lookup the buffer index - * of a given special field given a shift S by: - * - * idx = popcount(P & ((1 << S) - 1)) - * - * That is... look at all of the varyings that come earlier and count them, the - * count is the new index since plus one. Likewise, the total number of special - * buffers required is simply popcount(P) - */ - -enum pan_special_varying { - PAN_VARY_GENERAL = 0, - PAN_VARY_POSITION = 1, - PAN_VARY_PSIZ = 2, - PAN_VARY_PNTCOORD = 3, - PAN_VARY_FACE = 4, - PAN_VARY_FRAGCOORD = 5, - - /* Keep last */ - PAN_VARY_MAX, -}; - -/* Maximum number of attribute descriptors required for varyings. These include - * up to MAX_VARYING source level varyings plus a descriptor each non-GENERAL - * special varying */ -#define PAN_MAX_VARYINGS (MAX_VARYING + PAN_VARY_MAX - 1) - -/* Special attribute slots for vertex builtins. Sort of arbitrary but let's be - * consistent with the blob so we can compare traces easier. */ - -enum { PAN_VERTEX_ID = 16, PAN_INSTANCE_ID = 17, PAN_MAX_ATTRIBUTE }; - -/* Architecturally, Bifrost/Valhall can address 128 FAU slots of 64-bits each. - * In practice, the maximum number of FAU slots is limited by implementation. - * All known Bifrost and Valhall devices limit to 64 FAU slots. Therefore the - * maximum number of 32-bit words is 128, since there are 2 words per FAU slot. - * - * Midgard can push at most 92 words, so this bound suffices. The Midgard - * compiler pushes less than this, as Midgard uses register-mapped uniforms - * instead of FAU, preventing large numbers of uniforms to be pushed for - * nontrivial programs. - */ -#define PAN_MAX_PUSH 128 - -/* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so - * an offset to a word must be < 2^16. There are less than 2^8 UBOs */ - -struct pan_ubo_word { - uint16_t ubo; - uint16_t offset; -}; - -struct pan_ubo_push { - unsigned count; - struct pan_ubo_word words[PAN_MAX_PUSH]; -}; - -/* Helper for searching the above. Note this is O(N) to the number of pushed - * constants, do not run in the draw call hot path */ - -unsigned pan_lookup_pushed_ubo(struct pan_ubo_push *push, unsigned ubo, - unsigned offs); - -struct pan_compile_inputs { - unsigned gpu_id; - uint32_t gpu_variant; - /* Used on Bifrost and Valhall for pixel_local_storage load/store to convert - * the format to a descriptor. - */ - uint64_t (*get_conv_desc)(enum pipe_format fmt, unsigned rt, - unsigned force_size, bool dithered); - bool is_blend, is_blit; - struct { - unsigned nr_samples; - uint64_t bifrost_blend_desc; - } blend; - bool no_idvs; - uint32_t view_mask; - - nir_variable_mode robust2_modes; - /* Whether or not descriptor accesses should add additional robustness - * checks. */ - bool robust_descriptors; - - /* Mask of UBOs that may be moved to push constants */ - uint32_t pushable_ubos; - - /* Used on Valhall. - * - * Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0) - * written by the previous stage (fragment shader) or written by this - * stage (vertex shader). Bits are slots from gl_varying_slot. - * - * For modern APIs (GLES or VK), this should be 0. - */ - uint32_t fixed_varying_mask; - - /* Settings to move constants into the FAU. */ - struct { - uint32_t *values; - /* In multiples of 32bit. */ - uint32_t max_amount; - /* In multiples of 32bit. */ - uint32_t offset; - } fau_consts; - - union { - struct { - uint32_t rt_conv[8]; - } bifrost; - struct { - /* Use LD_VAR_BUF[_IMM] instead of LD_VAR[_IMM] to load varyings. */ - bool use_ld_var_buf; - } valhall; - }; -}; - -struct pan_shader_varying { - gl_varying_slot location; - enum pipe_format format; -}; - -struct bifrost_shader_blend_info { - nir_alu_type type; - uint32_t return_offset; - - /* mali_bifrost_register_file_format corresponding to nir_alu_type */ - unsigned format; -}; - -/* - * Unpacked form of a v7 message preload descriptor, produced by the compiler's - * message preload optimization. By splitting out this struct, the compiler does - * not need to know about data structure packing, avoiding a dependency on - * GenXML. - */ -struct bifrost_message_preload { - /* Whether to preload this message */ - bool enabled; - - /* Varying to load from */ - unsigned varying_index; - - /* Register type, FP32 otherwise */ - bool fp16; - - /* Number of components, ignored if texturing */ - unsigned num_components; - - /* If texture is set, performs a texture instruction according to - * texture_index, skip, and zero_lod. If texture is unset, only the - * varying load is performed. - */ - bool texture, skip, zero_lod; - unsigned texture_index; -}; - -struct bifrost_shader_info { - struct bifrost_shader_blend_info blend[8]; - nir_alu_type blend_src1_type; - bool wait_6, wait_7; - struct bifrost_message_preload messages[2]; - - /* Whether any flat varyings are loaded. This may disable optimizations - * that change the provoking vertex, since that would load incorrect - * values for flat varyings. - */ - bool uses_flat_shading; -}; - -struct midgard_shader_info { - unsigned first_tag; - union { - struct { - bool reads_raw_vertex_id; - } vs; - }; -}; - -struct pan_shader_info { - mesa_shader_stage stage; - unsigned work_reg_count; - unsigned tls_size; - unsigned wls_size; - - struct pan_stats stats, stats_idvs_varying; - - /* Bit mask of preloaded registers */ - uint64_t preload; - - uint32_t fau_consts_count; - uint32_t fau_consts[128]; - - union { - struct { - bool reads_frag_coord; - bool reads_point_coord; - bool reads_primitive_id; - bool reads_face; - bool can_discard; - bool writes_depth; - bool writes_stencil; - bool writes_coverage; - bool sidefx; - bool sample_shading; - bool early_fragment_tests; - bool can_early_z, can_fpk; - bool untyped_color_outputs; - BITSET_WORD outputs_read; - } fs; - - struct { - bool writes_point_size; - - /* True if this shader needs the extended FIFO format for - * more than just point size. - */ - bool needs_extended_fifo; - - /* If the primary shader writes point size, the Valhall - * driver may need a variant that does not write point - * size. Offset to such a shader in the program binary. - * - * Zero if no such variant is required. - * - * Only used with IDVS on Valhall. - */ - unsigned no_psiz_offset; - - /* Set if Index-Driven Vertex Shading is in use */ - bool idvs; - - /* If IDVS is used, whether a varying shader is used */ - bool secondary_enable; - - /* If a varying shader is used, the varying shader's - * offset in the program binary - */ - unsigned secondary_offset; - - /* If IDVS is in use, number of work registers used by - * the varying shader - */ - unsigned secondary_work_reg_count; - - /* If IDVS is in use, bit mask of preloaded registers - * used by the varying shader - */ - uint64_t secondary_preload; - } vs; - - struct { - /* Is it legal to merge workgroups? This is true if the - * shader uses neither barriers nor shared memory. This - * requires caution: if the API allows specifying shared - * memory at launch time (instead of compile time), that - * memory will not be accounted for by the compiler. - * - * Used by the Valhall hardware. - */ - bool allow_merging_workgroups; - } cs; - }; - - /* Does the shader contains a barrier? or (for fragment shaders) does it - * require helper invocations, which demand the same ordering guarantees - * of the hardware? These notions are unified in the hardware, so we - * unify them here as well. - */ - bool contains_barrier; - bool separable; - bool writes_global; - uint64_t outputs_written; - - /* Floating point controls that the driver should try to honour */ - bool ftz_fp16, ftz_fp32; - - /* True if the shader contains a shader_clock instruction. */ - bool has_shader_clk_instr; - - unsigned sampler_count; - unsigned texture_count; - unsigned ubo_count; - unsigned attributes_read_count; - unsigned attribute_count; - unsigned attributes_read; - - struct { - unsigned input_count; - struct pan_shader_varying input[PAN_MAX_VARYINGS]; - unsigned output_count; - struct pan_shader_varying output[PAN_MAX_VARYINGS]; - - /* Bitfield of noperspective varyings, starting at VARYING_SLOT_VAR0 */ - uint32_t noperspective; - - /* Bitfield of special varyings. */ - uint32_t fixed_varyings; - } varyings; - - /* UBOs to push to Register Mapped Uniforms (Midgard) or Fast Access - * Uniforms (Bifrost) */ - struct pan_ubo_push push; - - uint32_t ubo_mask; - - /* Quirk for GPUs that does not support auto32 types. */ - bool quirk_no_auto32; - - union { - struct bifrost_shader_info bifrost; - struct midgard_shader_info midgard; - }; -}; - -uint16_t pan_to_bytemask(unsigned bytes, unsigned mask); - -/* NIR passes to do some backend-specific lowering */ - -#define PAN_WRITEOUT_C 1 -#define PAN_WRITEOUT_Z 2 -#define PAN_WRITEOUT_S 4 -#define PAN_WRITEOUT_2 8 - -/* - * Helper returning the subgroup size. Generally, this is equal to the number of - * threads in a warp. For Midgard (including warping models), this returns 1, as - * subgroups are not supported. - */ -static inline unsigned -pan_subgroup_size(unsigned arch) -{ - if (arch >= 9) - return 16; - else if (arch >= 7) - return 8; - else if (arch >= 6) - return 4; - else - return 1; -} - -/* - * Helper extracting the table from a given handle of Valhall descriptor model. - */ -static inline unsigned -pan_res_handle_get_table(unsigned handle) -{ - unsigned table = handle >> 24; - - assert(table < 64); - return table; -} - -/* - * Helper returning the index from a given handle of Valhall descriptor model. - */ -static inline unsigned -pan_res_handle_get_index(unsigned handle) -{ - return handle & BITFIELD_MASK(24); -} - -/* - * Helper creating an handle for Valhall descriptor model. - */ -static inline unsigned -pan_res_handle(unsigned table, unsigned index) -{ - assert(table < 64); - assert(index < (1u << 24)); - - return (table << 24) | index; -} - -#endif diff --git a/src/panfrost/compiler/pan_nir.h b/src/panfrost/compiler/pan_nir.h index e63ebc37fce..a7afdf97724 100644 --- a/src/panfrost/compiler/pan_nir.h +++ b/src/panfrost/compiler/pan_nir.h @@ -27,7 +27,6 @@ #include "nir.h" #include "pan_compiler.h" -#include "pan_ir.h" struct util_format_description; diff --git a/src/panfrost/lib/pan_earlyzs.c b/src/panfrost/lib/pan_earlyzs.c index 9f55bc22661..5b936b92237 100644 --- a/src/panfrost/lib/pan_earlyzs.c +++ b/src/panfrost/lib/pan_earlyzs.c @@ -24,7 +24,7 @@ #include "genxml/gen_macros.h" #include "pan_earlyzs.h" -#include "panfrost/compiler/pan_ir.h" +#include "panfrost/compiler/pan_compiler.h" /* diff --git a/src/panfrost/lib/pan_shader.h b/src/panfrost/lib/pan_shader.h index c8a286cb389..54eb6147280 100644 --- a/src/panfrost/lib/pan_shader.h +++ b/src/panfrost/lib/pan_shader.h @@ -28,7 +28,7 @@ #include "compiler/nir/nir.h" #include "genxml/gen_macros.h" #include "panfrost/lib/pan_props.h" -#include "panfrost/compiler/pan_ir.h" +#include "panfrost/compiler/pan_compiler.h" static unsigned pan_get_fixed_varying_mask(unsigned varyings_used) diff --git a/src/panfrost/lib/tests/test-earlyzs.cpp b/src/panfrost/lib/tests/test-earlyzs.cpp index 2295a43aadd..670058fa964 100644 --- a/src/panfrost/lib/tests/test-earlyzs.cpp +++ b/src/panfrost/lib/tests/test-earlyzs.cpp @@ -21,7 +21,7 @@ * SOFTWARE. */ -#include "compiler/pan_ir.h" +#include "compiler/pan_compiler.h" #include "pan_earlyzs.h" #include "genxml/gen_macros.h" diff --git a/src/panfrost/vulkan/panvk_shader.h b/src/panfrost/vulkan/panvk_shader.h index 9fff935a499..4c04c6c5f41 100644 --- a/src/panfrost/vulkan/panvk_shader.h +++ b/src/panfrost/vulkan/panvk_shader.h @@ -11,7 +11,6 @@ #endif #include "compiler/pan_compiler.h" -#include "compiler/pan_ir.h" #include "pan_desc.h" #include "pan_earlyzs.h"