mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-22 06:20:22 +01:00
Backport-to: 25.1 Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36148>
484 lines
20 KiB
Python
484 lines
20 KiB
Python
# Copyright © 2024 Intel Corporation
|
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
# copy of this software and associated documentation files (the "Software"),
|
|
# to deal in the Software without restriction, including without limitation
|
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
# and/or sell copies of the Software, and to permit persons to whom the
|
|
# Software is furnished to do so, subject to the following conditions:
|
|
|
|
# The above copyright notice and this permission notice (including the next
|
|
# paragraph) shall be included in all copies or substantial portions of the
|
|
# Software.
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
# IN THE SOFTWARE.
|
|
|
|
from textwrap import dedent
|
|
|
|
# TYPES is an ordered list of all declarations in this file.
|
|
TYPES = []
|
|
|
|
# TYPES_BY_NAME allows the lookup of any declaration
|
|
TYPES_BY_NAME = {}
|
|
|
|
class Define:
|
|
"""Specifies a c macro definition."""
|
|
def __init__(self, name, value, comment=None):
|
|
self.name = name
|
|
self.value = value
|
|
self.comment = comment
|
|
TYPES.append(self)
|
|
|
|
class EnumValue:
|
|
"""allows comments and setting of enum values"""
|
|
def __init__(self, name, value=None, comment=None,
|
|
group_begin=None, group_end=None):
|
|
self.name = name
|
|
self.value = value
|
|
self.comment = comment
|
|
self.group_begin = group_begin
|
|
self.group_end = group_end
|
|
|
|
def __str__(self):
|
|
return self.name
|
|
|
|
class Enum:
|
|
"""Stores details needed to declare and serialize an enumeration"""
|
|
def __init__(self, name, values, external=False):
|
|
self.name = name
|
|
self.values = []
|
|
for v in values:
|
|
if isinstance(v, EnumValue):
|
|
self.values.append(v)
|
|
else:
|
|
self.values.append(EnumValue(v))
|
|
|
|
self.external = external
|
|
TYPES.append(self)
|
|
TYPES_BY_NAME[name] = TYPES[-1]
|
|
|
|
class Member:
|
|
"""Stores details needed to declare and serialize the member of a struct."""
|
|
def __init__(self, member_type, name, array=None,
|
|
compiler_field=False, ray_tracing_field=False,
|
|
comment=None):
|
|
self.member_type = member_type
|
|
self.name = name
|
|
self.array = array
|
|
# indicates whether this field is used by the compiler, and whether it
|
|
# should be included in the shader compiler cache hash function.
|
|
self.compiler_field = compiler_field
|
|
self.ray_tracing_field = ray_tracing_field
|
|
self.comment=comment
|
|
|
|
class Struct:
|
|
"""Stores details needed to declare and serialize a struct"""
|
|
def __init__(self, name, members):
|
|
self.name = name
|
|
self.members = members
|
|
TYPES.append(self)
|
|
TYPES_BY_NAME[name] = TYPES[-1]
|
|
|
|
INT_TYPES = set(["uint8_t",
|
|
"uint16_t",
|
|
"uint32_t",
|
|
"uint64_t",
|
|
"unsigned",
|
|
"int"])
|
|
|
|
FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES)
|
|
|
|
Define("INTEL_DEVICE_MAX_NAME_SIZE", 64)
|
|
Define("INTEL_DEVICE_MAX_SLICES", 8)
|
|
Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11")
|
|
Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11")
|
|
Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2")
|
|
|
|
Enum("intel_platform",
|
|
[EnumValue("INTEL_PLATFORM_GFX3", value=1),
|
|
"INTEL_PLATFORM_I965",
|
|
"INTEL_PLATFORM_ILK",
|
|
"INTEL_PLATFORM_G4X",
|
|
"INTEL_PLATFORM_SNB",
|
|
"INTEL_PLATFORM_IVB",
|
|
"INTEL_PLATFORM_BYT",
|
|
"INTEL_PLATFORM_HSW",
|
|
"INTEL_PLATFORM_BDW",
|
|
"INTEL_PLATFORM_CHV",
|
|
"INTEL_PLATFORM_SKL",
|
|
"INTEL_PLATFORM_BXT",
|
|
"INTEL_PLATFORM_KBL",
|
|
"INTEL_PLATFORM_GLK",
|
|
"INTEL_PLATFORM_CFL",
|
|
"INTEL_PLATFORM_ICL",
|
|
"INTEL_PLATFORM_EHL",
|
|
"INTEL_PLATFORM_TGL",
|
|
"INTEL_PLATFORM_RKL",
|
|
"INTEL_PLATFORM_DG1",
|
|
"INTEL_PLATFORM_ADL",
|
|
"INTEL_PLATFORM_RPL",
|
|
EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"),
|
|
"INTEL_PLATFORM_DG2_G11",
|
|
EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"),
|
|
EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"),
|
|
EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"),
|
|
EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"),
|
|
EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"),
|
|
EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"),
|
|
EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"),
|
|
"INTEL_PLATFORM_LNL",
|
|
"INTEL_PLATFORM_BMG",
|
|
"INTEL_PLATFORM_PTL",
|
|
"INTEL_PLATFORM_WCL",
|
|
])
|
|
|
|
Struct("intel_memory_class_instance",
|
|
[ Member("int", "klass",
|
|
comment = "Kernel backend specific class value, no translation needed yet"),
|
|
Member("int", "instance")])
|
|
|
|
Enum("intel_device_info_mmap_mode",
|
|
[EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0),
|
|
EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WC"),
|
|
EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WB"),
|
|
EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_XD",
|
|
comment=dedent("""\
|
|
Xe2+ only. Only supported in GPU side and used for displayable
|
|
buffers."""))
|
|
])
|
|
|
|
Struct("intel_device_info_pat_entry",
|
|
[Member("uint8_t", "index"),
|
|
Member("intel_device_info_mmap_mode", "mmap",
|
|
comment=dedent("""\
|
|
This tells KMD what caching mode the CPU mapping should use.
|
|
It has nothing to do with any PAT cache modes."""))])
|
|
|
|
Enum("intel_cmat_scope",
|
|
[EnumValue("INTEL_CMAT_SCOPE_NONE", value=0),
|
|
"INTEL_CMAT_SCOPE_SUBGROUP"])
|
|
|
|
Enum("intel_cooperative_matrix_component_type",
|
|
["INTEL_CMAT_FLOAT16",
|
|
"INTEL_CMAT_FLOAT32",
|
|
"INTEL_CMAT_SINT32",
|
|
"INTEL_CMAT_SINT8",
|
|
"INTEL_CMAT_UINT32",
|
|
"INTEL_CMAT_UINT8",
|
|
"INTEL_CMAT_BFLOAT16"])
|
|
|
|
Enum("intel_engine_class",
|
|
["INTEL_ENGINE_CLASS_RENDER",
|
|
"INTEL_ENGINE_CLASS_COPY",
|
|
"INTEL_ENGINE_CLASS_VIDEO",
|
|
"INTEL_ENGINE_CLASS_VIDEO_ENHANCE",
|
|
"INTEL_ENGINE_CLASS_COMPUTE",
|
|
"INTEL_ENGINE_CLASS_INVALID"])
|
|
|
|
Struct("intel_cooperative_matrix_configuration",
|
|
[Member("intel_cmat_scope", "scope",
|
|
comment=dedent("""\
|
|
Matrix A is MxK.
|
|
Matrix B is KxN.
|
|
Matrix C and Matrix Result are MxN.
|
|
|
|
Result = A * B + C;""")),
|
|
Member("uint8_t", "m"),
|
|
Member("uint8_t", "n"),
|
|
Member("uint8_t", "k"),
|
|
Member("intel_cooperative_matrix_component_type", "a"),
|
|
Member("intel_cooperative_matrix_component_type", "b"),
|
|
Member("intel_cooperative_matrix_component_type", "c"),
|
|
Member("intel_cooperative_matrix_component_type", "result")])
|
|
|
|
Enum("intel_kmd_type",
|
|
["INTEL_KMD_TYPE_INVALID",
|
|
"INTEL_KMD_TYPE_I915",
|
|
"INTEL_KMD_TYPE_XE",
|
|
"INTEL_KMD_TYPE_STUB",
|
|
"INTEL_KMD_TYPE_LAST"
|
|
], external=True)
|
|
|
|
Struct("intel_device_info_mem_region",
|
|
[Member("uint64_t", "size"),
|
|
Member("uint64_t", "free")])
|
|
|
|
Struct("intel_device_info_ram_desc",
|
|
[Member("intel_memory_class_instance", "mem"),
|
|
Member("intel_device_info_mem_region", "mappable"),
|
|
Member("intel_device_info_mem_region", "unmappable")])
|
|
|
|
Struct("intel_device_info_mem_desc",
|
|
[Member("bool", "use_class_instance"),
|
|
Member("intel_device_info_ram_desc", "sram"),
|
|
Member("intel_device_info_ram_desc", "vram")])
|
|
|
|
Struct("intel_device_info_urb_desc",
|
|
[Member("int", "size"),
|
|
Member("int", "min_entries", array=4),
|
|
Member("int", "max_entries", array=4)])
|
|
|
|
Struct("intel_device_info_pat_desc",
|
|
[Member("intel_device_info_pat_entry", "cached_coherent",
|
|
comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"),
|
|
|
|
Member("intel_device_info_pat_entry", "scanout",
|
|
comment="scanout and external BOs"),
|
|
|
|
Member("intel_device_info_pat_entry", "compressed_scanout",
|
|
comment="Only supported in Xe2, compressed + WC for displayable resources"),
|
|
|
|
Member("intel_device_info_pat_entry", "compressed",
|
|
comment="Only supported in Xe2, compressed + WC for non-displayable resources"),
|
|
|
|
Member("intel_device_info_pat_entry", "writeback_incoherent",
|
|
comment=("BOs without special needs, can be WB not coherent "
|
|
"or WC it depends on the platforms and KMD")),
|
|
|
|
Member("intel_device_info_pat_entry", "writecombining")])
|
|
|
|
Struct("intel_device_info",
|
|
[Member("intel_kmd_type", "kmd_type"),
|
|
|
|
Member("int", "ver", compiler_field=True,
|
|
comment="Driver internal numbers used to differentiate platforms."),
|
|
|
|
Member("int", "verx10", compiler_field=True),
|
|
|
|
Member("uint32_t", "gfx_ip_ver", compiler_field=True,
|
|
comment=dedent("""\
|
|
This is the run-time hardware GFX IP version that may be more specific
|
|
than ver/verx10. ver/verx10 may be more useful for comparing a class
|
|
of devices whereas gfx_ip_ver may be more useful for precisely
|
|
checking for a graphics ip type. GFX_IP_VER(major, minor) should be
|
|
used to compare IP versions.""")),
|
|
|
|
Member("int", "revision",
|
|
comment=dedent("""\
|
|
This revision is queried from KMD unlike
|
|
pci_revision_id from drm device. Its value is not always
|
|
same as the pci_revision_id.
|
|
For LNL+ this is the stepping of GT IP/GMD RevId.""")),
|
|
|
|
Member("int", "gt"),
|
|
Member("uint16_t", "pci_domain", comment="PCI info"),
|
|
Member("uint8_t", "pci_bus"),
|
|
Member("uint8_t", "pci_dev"),
|
|
Member("uint8_t", "pci_func"),
|
|
Member("uint16_t", "pci_device_id"),
|
|
Member("uint8_t", "pci_revision_id"),
|
|
Member("intel_platform", "platform", compiler_field=True),
|
|
Member("bool", "has_hiz_and_separate_stencil"),
|
|
Member("bool", "has_sample_with_hiz"),
|
|
Member("bool", "has_bit6_swizzle"),
|
|
Member("bool", "has_llc"),
|
|
Member("bool", "has_pln", compiler_field=True),
|
|
Member("bool", "has_64bit_float", compiler_field=True),
|
|
Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True),
|
|
Member("bool", "has_64bit_int", compiler_field=True),
|
|
Member("bool", "has_bfloat16", compiler_field=True),
|
|
Member("bool", "has_integer_dword_mul", compiler_field=True),
|
|
Member("bool", "has_systolic", compiler_field=True),
|
|
Member("bool", "supports_simd16_3src", compiler_field=True),
|
|
Member("bool", "disable_ccs_repack"),
|
|
|
|
Member("bool", "has_illegal_ccs_values",
|
|
comment="True if CCS needs to be initialized before use."),
|
|
|
|
Member("bool", "has_flat_ccs",
|
|
comment=dedent("""\
|
|
True if CCS uses a flat virtual address translation to a memory
|
|
carve-out, rather than aux map translations, or additional surfaces.""")),
|
|
|
|
Member("bool", "has_aux_map"),
|
|
Member("bool", "has_caching_uapi"),
|
|
Member("bool", "has_tiling_uapi"),
|
|
Member("bool", "has_ray_tracing", compiler_field=True),
|
|
Member("bool", "has_ray_query"),
|
|
Member("bool", "has_local_mem"),
|
|
Member("bool", "has_lsc", compiler_field=True),
|
|
Member("bool", "has_mesh_shading"),
|
|
Member("bool", "has_mmap_offset"),
|
|
Member("bool", "has_partial_mmap_offset"),
|
|
Member("bool", "has_userptr_probe"),
|
|
Member("bool", "has_context_isolation"),
|
|
Member("bool", "has_set_pat_uapi"),
|
|
Member("bool", "has_indirect_unroll"),
|
|
|
|
Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True,
|
|
comment=dedent("""\
|
|
Whether this platform supports fragment shading rate controlled by a
|
|
primitive in geometry shaders and by a control buffer.""")),
|
|
|
|
Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"),
|
|
|
|
Member("bool", "needs_null_push_constant_tbimr_workaround",
|
|
comment=dedent("""\
|
|
Whether the platform needs an undocumented workaround for a hardware bug
|
|
that affects draw calls with a pixel shader that has 0 push constant cycles
|
|
when TBIMR is enabled, which has been seen to lead to hangs. To avoid the
|
|
issue we simply pad the push constant payload to be at least 1 register.""")),
|
|
|
|
Member("unsigned", "num_slices",
|
|
comment=dedent("""\
|
|
GPU hardware limits
|
|
|
|
In general, you can find shader thread maximums by looking at the "Maximum
|
|
Number of Threads" field in the Intel PRM description of the 3DSTATE_VS,
|
|
3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry
|
|
limits come from the "Number of URB Entries" field in the
|
|
3DSTATE_URB_VS command and friends.
|
|
|
|
These fields are used to calculate the scratch space to allocate. The
|
|
amount of scratch space can be larger without being harmful on modern
|
|
GPUs, however, prior to Haswell, programming the maximum number of threads
|
|
to greater than the hardware maximum would cause GPU performance to tank.
|
|
|
|
Total number of slices present on the device whether or not they've been
|
|
fused off.
|
|
|
|
XXX: CS thread counts are limited by the inability to do cross subslice
|
|
communication. It is the effectively the number of logical threads which
|
|
can be executed in a subslice. Fuse configurations may cause this number
|
|
to change, so we program @max_cs_threads as the lower maximum.""")),
|
|
|
|
Member("unsigned", "max_slices", compiler_field=True,
|
|
comment=dedent("""\
|
|
Maximum number of slices present on this device (can be more than
|
|
num_slices if some slices are fused).""")),
|
|
|
|
Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES",
|
|
comment="Number of subslices for each slice (used to be uniform until CNL)."),
|
|
|
|
Member("unsigned", "max_subslices_per_slice", compiler_field=True,
|
|
comment=dedent("""\
|
|
Maximum number of subslices per slice present on this device (can be
|
|
more than the maximum value in the num_subslices[] array if some
|
|
subslices are fused).
|
|
|
|
This is GT_SS_PER_SLICE in SKU.""")),
|
|
|
|
Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES",
|
|
comment="Number of subslices on each pixel pipe (ICL)."),
|
|
|
|
Member("unsigned", "max_eus_per_subslice", compiler_field=True,
|
|
comment="Maximum number of EUs per subslice (some EUs can be fused off)."),
|
|
|
|
Member("unsigned", "num_thread_per_eu", compiler_field=True,
|
|
comment="Number of threads per eu, varies between 4 and 8 between generations."),
|
|
|
|
Member("uint8_t", "grf_size",
|
|
comment="Size of a register from the EU GRF file in bytes."),
|
|
|
|
Member("uint8_t", "slice_masks",
|
|
comment="A bit mask of the slices available."),
|
|
|
|
Member("uint8_t", "subslice_masks",
|
|
array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)",
|
|
compiler_field=True,
|
|
ray_tracing_field=True,
|
|
comment=dedent("""\
|
|
An array of bit mask of the subslices available, use subslice_slice_stride
|
|
to access this array.""")),
|
|
|
|
Member("unsigned", "subslice_total",
|
|
comment=dedent("""\
|
|
The number of enabled subslices (considering fusing). For exactly which
|
|
subslices are enabled, see subslice_masks[].""")),
|
|
|
|
Member("uint8_t", "eu_masks",
|
|
array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * "
|
|
"DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"),
|
|
comment=dedent("""\
|
|
An array of bit mask of EUs available, use eu_slice_stride &
|
|
eu_subslice_stride to access this array.""")),
|
|
|
|
Member("uint16_t", "subslice_slice_stride", compiler_field=True,
|
|
comment="Stride to access subslice_masks[]."),
|
|
|
|
Member("uint16_t", "eu_slice_stride",
|
|
comment="Strides to access eu_masks[]."),
|
|
|
|
Member("uint16_t", "eu_subslice_stride"),
|
|
Member("unsigned", "l3_banks"),
|
|
|
|
Member("unsigned", "max_vs_threads",
|
|
comment="Maximum Vertex Shader threads"),
|
|
|
|
Member("unsigned", "max_tcs_threads",
|
|
comment="Maximum Hull Shader threads"),
|
|
|
|
Member("unsigned", "max_tes_threads",
|
|
comment="Maximum Domain Shader threads"),
|
|
|
|
Member("unsigned", "max_gs_threads",
|
|
comment="Maximum Geometry Shader threads"),
|
|
|
|
Member("unsigned", "max_wm_threads",
|
|
comment=dedent("""\
|
|
Theoretical maximum number of Pixel Shader threads.
|
|
|
|
PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will
|
|
automatically scale pixel shader thread count, based on a single value
|
|
programmed into 3DSTATE_PS.
|
|
|
|
To calculate the maximum number of threads for Gfx8 beyond (which have
|
|
multiple Pixel Shader Dispatchers):
|
|
|
|
- Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
|
|
- Usually there's only one PSD per subslice, so use the number of
|
|
subslices for number of PSDs.
|
|
- For max_wm_threads, the total should be PSD threads * #PSDs.""")),
|
|
|
|
Member("unsigned", "max_threads_per_psd"),
|
|
|
|
Member("unsigned", "max_cs_threads",
|
|
comment=dedent("""\
|
|
Maximum Compute Shader threads per subslice.
|
|
Actual maximum compute shader threads is max_cs_threads * subslices.
|
|
|
|
Thread count * number of EUs per subslice""")),
|
|
|
|
Member("unsigned", "max_cs_workgroup_threads", compiler_field=True,
|
|
comment=dedent("""\
|
|
Maximum number of threads per workgroup supported by the GPGPU_WALKER or
|
|
COMPUTE_WALKER command.
|
|
|
|
This may be smaller than max_cs_threads as it takes into account added
|
|
restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads
|
|
expresses the total parallelism of the GPU, this expresses the maximum
|
|
number of threads we can dispatch in a single workgroup.""")),
|
|
|
|
|
|
Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True,
|
|
comment=dedent("""\
|
|
The maximum number of potential scratch ids. Due to hardware
|
|
implementation details, the range of scratch ids may be larger than the
|
|
number of subslices.""")),
|
|
|
|
Member("uint32_t", "max_scratch_size_per_thread", compiler_field=True),
|
|
|
|
Member("intel_device_info_urb_desc", "urb"),
|
|
Member("unsigned", "max_constant_urb_size_kb"),
|
|
Member("unsigned", "mesh_max_constant_urb_size_kb"),
|
|
Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_INVALID"),
|
|
Member("unsigned", "engine_class_supported_count", array="INTEL_ENGINE_CLASS_INVALID"),
|
|
Member("unsigned", "mem_alignment"),
|
|
Member("uint64_t", "timestamp_frequency"),
|
|
Member("uint64_t", "aperture_bytes"),
|
|
Member("uint64_t", "gtt_size"),
|
|
Member("int", "simulator_id"),
|
|
Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"),
|
|
Member("bool", "no_hw"),
|
|
Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"),
|
|
Member("intel_device_info_mem_desc", "mem"),
|
|
Member("intel_device_info_pat_desc", "pat"),
|
|
Member("intel_cooperative_matrix_configuration",
|
|
"cooperative_matrix_configurations", array=16)]
|
|
)
|