mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 20:20:18 +01:00
Compare commits
203 commits
main
...
mesa-25.3.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f7aeb0d677 | ||
|
|
523eea18c5 | ||
|
|
48b0dd2892 | ||
|
|
25abf47e3e | ||
|
|
28ca4a48d6 | ||
|
|
23665f9bd9 | ||
|
|
b7ce6abb6a | ||
|
|
0fbf00af9b | ||
|
|
e2164fbc11 | ||
|
|
f651443a74 | ||
|
|
b5a4245193 | ||
|
|
76d66b72db | ||
|
|
09b856c367 | ||
|
|
e5f9980d50 | ||
|
|
7f68450a6c | ||
|
|
feccefbc86 | ||
|
|
a407dc6d83 | ||
|
|
4a0cb910e4 | ||
|
|
8ffc19f935 | ||
|
|
de44196bfb | ||
|
|
f0aeb824b9 | ||
|
|
2cb191d7d1 | ||
|
|
b074ea9fe8 | ||
|
|
42726a2afa | ||
|
|
e42294ba9f | ||
|
|
3bb77d6906 | ||
|
|
efd2f1d61c | ||
|
|
2cd6bc199a | ||
|
|
5c3427b1fe | ||
|
|
b6ae45d326 | ||
|
|
14097ed79d | ||
|
|
0ef221f4a4 | ||
|
|
263e1823d2 | ||
|
|
e334938384 | ||
|
|
f299249d8b | ||
|
|
594ae17ec9 | ||
|
|
02ba16ec03 | ||
|
|
1460a0319f | ||
|
|
d48e4a3f3b | ||
|
|
d3d820d6ef | ||
|
|
70e3af188d | ||
|
|
b777384e99 | ||
|
|
c4e0f4d917 | ||
|
|
f3268818d5 | ||
|
|
9e809986f6 | ||
|
|
8a813632c3 | ||
|
|
b17381dc8d | ||
|
|
7aa2c70759 | ||
|
|
11a4adec73 | ||
|
|
e21d417234 | ||
|
|
dafde3434a | ||
|
|
38e258d5d4 | ||
|
|
478d92171d | ||
|
|
96d959986c | ||
|
|
c167b0a816 | ||
|
|
ec8518d123 | ||
|
|
269a6fe030 | ||
|
|
4b46e87296 | ||
|
|
3f169d14d2 | ||
|
|
3a63355583 | ||
|
|
7fb0030c06 | ||
|
|
e96e71fb79 | ||
|
|
eface4be0d | ||
|
|
36aff3454b | ||
|
|
3d9d9ca09b | ||
|
|
e817b525d8 | ||
|
|
8eec239517 | ||
|
|
e9f677dff9 | ||
|
|
4f5c1c6c75 | ||
|
|
944ec88ca5 | ||
|
|
7d4557bae8 | ||
|
|
4482281292 | ||
|
|
1dadb38d7b | ||
|
|
ef9457d119 | ||
|
|
9696921018 | ||
|
|
30678337dd | ||
|
|
eb2668aad7 | ||
|
|
e23c722170 | ||
|
|
2ad12150e8 | ||
|
|
3109237d7c | ||
|
|
775652e08b | ||
|
|
51285c6715 | ||
|
|
c70fd7f766 | ||
|
|
d527eedb15 | ||
|
|
7c18540961 | ||
|
|
90b6c3a8ac | ||
|
|
3dab73159b | ||
|
|
55a37838b9 | ||
|
|
9bad1beb98 | ||
|
|
3086692bcd | ||
|
|
ce6c6a7a57 | ||
|
|
629a0a4dcc | ||
|
|
12c82aaa82 | ||
|
|
1e885e7a88 | ||
|
|
3ddddf78b4 | ||
|
|
86313f9571 | ||
|
|
a46307a732 | ||
|
|
0a0d08dfe0 | ||
|
|
182877f3c8 | ||
|
|
9aeac1e0a7 | ||
|
|
46f0422165 | ||
|
|
f69d1abfcf | ||
|
|
770e095766 | ||
|
|
205fe1a245 | ||
|
|
093c7d9d8e | ||
|
|
2c67b0fac6 | ||
|
|
e082f6b6c0 | ||
|
|
a12369eb3d | ||
|
|
6670d0742b | ||
|
|
a7a020dde6 | ||
|
|
7e15070ee1 | ||
|
|
0edb1852a7 | ||
|
|
3ce875a2d0 | ||
|
|
fd777ce645 | ||
|
|
315b688976 | ||
|
|
3a71d94735 | ||
|
|
8a2bf930bb | ||
|
|
ac492d42be | ||
|
|
2e17fd0cb2 | ||
|
|
9311f170c4 | ||
|
|
3e227a04b1 | ||
|
|
f63a5df30b | ||
|
|
9ba765e3e3 | ||
|
|
8010d0cd39 | ||
|
|
f1f32d557e | ||
|
|
05e5db1a4d | ||
|
|
5ae8474029 | ||
|
|
b3470359bf | ||
|
|
5e1a88cea0 | ||
|
|
040453857b | ||
|
|
28e172e956 | ||
|
|
74880f8954 | ||
|
|
f02f5e217f | ||
|
|
d9636807f7 | ||
|
|
b768139858 | ||
|
|
498a25cfb8 | ||
|
|
9728bbf7b0 | ||
|
|
f142fdc273 | ||
|
|
1c52a94428 | ||
|
|
2cfd3c52b2 | ||
|
|
606ebb042e | ||
|
|
424f37b348 | ||
|
|
7f75931019 | ||
|
|
ba107091c2 | ||
|
|
b74000dbce | ||
|
|
fb2273df78 | ||
|
|
65eb3aed4b | ||
|
|
a9653fa019 | ||
|
|
159d397437 | ||
|
|
6a7effe059 | ||
|
|
2a0a2cc5b0 | ||
|
|
3f9f4d79d3 | ||
|
|
cd253df92a | ||
|
|
bfd09d9891 | ||
|
|
dcecd8fd1e | ||
|
|
1648f759c1 | ||
|
|
d5f7261ce5 | ||
|
|
2c1c52a8c8 | ||
|
|
fe3a3b08c9 | ||
|
|
d9812eaea8 | ||
|
|
be191ceff7 | ||
|
|
49bfddbd11 | ||
|
|
0182cde848 | ||
|
|
94ec7c686d | ||
|
|
4202ea6c7f | ||
|
|
10475e8ac1 | ||
|
|
c1cf6e75ae | ||
|
|
2b8675fd86 | ||
|
|
e967da84a8 | ||
|
|
2a8f2ff397 | ||
|
|
7a30a71c45 | ||
|
|
9c57c0a194 | ||
|
|
425c49ebf2 | ||
|
|
7b7cb63a14 | ||
|
|
1941ada4a6 | ||
|
|
e982234bb6 | ||
|
|
dbbadebe13 | ||
|
|
0d100cc078 | ||
|
|
f656d062e3 | ||
|
|
847ad886d6 | ||
|
|
5dcc65643c | ||
|
|
ab7bda0a1b | ||
|
|
a02d8d5767 | ||
|
|
13fa1460dd | ||
|
|
14544ef278 | ||
|
|
602b4a2924 | ||
|
|
717e8a8caf | ||
|
|
40ff53c5b8 | ||
|
|
bf9e1f2e37 | ||
|
|
c3cf272a04 | ||
|
|
30ba8880b4 | ||
|
|
42ab1c6f3c | ||
|
|
674e2a702a | ||
|
|
756618ee3b | ||
|
|
ca7d2daf5f | ||
|
|
45aafef631 | ||
|
|
8711394383 | ||
|
|
289c768e88 | ||
|
|
84655b4b5d | ||
|
|
fd6b9c70b6 | ||
|
|
9bb7bf9c66 | ||
|
|
f510e6a1bd | ||
|
|
40f7bef16c |
215 changed files with 20116 additions and 2391 deletions
|
|
@ -34,7 +34,6 @@
|
||||||
# anholt | (decommissioned) | @anholt
|
# anholt | (decommissioned) | @anholt
|
||||||
# austriancoder | ci-tron | @austriancoder
|
# austriancoder | ci-tron | @austriancoder
|
||||||
# collabora | lava | @daniels, @sergi
|
# collabora | lava | @daniels, @sergi
|
||||||
# google-freedreno | none (moving to LAVA) | @daniels, @sergi
|
|
||||||
# igalia | baremetal/poe-powered, ci-tron | @jasuarez, @chema
|
# igalia | baremetal/poe-powered, ci-tron | @jasuarez, @chema
|
||||||
# lima | lava | @enunes
|
# lima | lava | @enunes
|
||||||
# microsoft | custom | @jenatali, @alatiera
|
# microsoft | custom | @jenatali, @alatiera
|
||||||
|
|
@ -293,15 +292,6 @@
|
||||||
- !reference [.pengutronix-farm-rules, rules]
|
- !reference [.pengutronix-farm-rules, rules]
|
||||||
|
|
||||||
|
|
||||||
# Temporary placeholder as the devices move across to LAVA.
|
|
||||||
.google-freedreno-farm-rules:
|
|
||||||
rules:
|
|
||||||
- when: never
|
|
||||||
|
|
||||||
.google-freedreno-farm-manual-rules:
|
|
||||||
rules:
|
|
||||||
- when: never
|
|
||||||
|
|
||||||
# Skip container & build jobs when disabling any farm, and run them if any
|
# Skip container & build jobs when disabling any farm, and run them if any
|
||||||
# farm gets re-enabled.
|
# farm gets re-enabled.
|
||||||
# Only apply these rules in MR context, because otherwise we get a false
|
# Only apply these rules in MR context, because otherwise we get a false
|
||||||
|
|
|
||||||
|
|
@ -118,7 +118,6 @@ def main():
|
||||||
# before we make it to 9-digit jobs (we're at 7 so far).
|
# before we make it to 9-digit jobs (we're at 7 so far).
|
||||||
nick = args.runner
|
nick = args.runner
|
||||||
nick = nick.replace('mesa-', '')
|
nick = nick.replace('mesa-', '')
|
||||||
nick = nick.replace('google-freedreno-', '')
|
|
||||||
nick += f'-{args.job}'
|
nick += f'-{args.job}'
|
||||||
irc.send_line(f"NICK {nick}")
|
irc.send_line(f"NICK {nick}")
|
||||||
irc.send_line(f"USER {nick} unused unused: Gitlab CI Notifier")
|
irc.send_line(f"USER {nick} unused unused: Gitlab CI Notifier")
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,8 @@
|
||||||
- subprojects/**/*
|
- subprojects/**/*
|
||||||
- .gitattributes
|
- .gitattributes
|
||||||
- src/*
|
- src/*
|
||||||
|
- src/android_stub/**/*
|
||||||
|
- src/c11/**/*
|
||||||
- src/compiler/**/*
|
- src/compiler/**/*
|
||||||
- src/drm-shim/**/*
|
- src/drm-shim/**/*
|
||||||
- src/gtest/**/*
|
- src/gtest/**/*
|
||||||
|
|
|
||||||
11872
.pick_status.json
Normal file
11872
.pick_status.json
Normal file
File diff suppressed because it is too large
Load diff
2
VERSION
2
VERSION
|
|
@ -1 +1 @@
|
||||||
25.3.0-devel
|
25.3.0
|
||||||
|
|
|
||||||
|
|
@ -122,9 +122,8 @@ Enable the site and restart nginx:
|
||||||
# Second download should be cached.
|
# Second download should be cached.
|
||||||
wget http://localhost/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public/itoral-gl-terrain-demo/demo-v2.trace
|
wget http://localhost/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public/itoral-gl-terrain-demo/demo-v2.trace
|
||||||
|
|
||||||
Now, set ``download-url`` in your ``traces-*.yml`` entry to something like
|
The trace runner script automatically sets the caching proxy, so there's no
|
||||||
``http://caching-proxy/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public``
|
need to modify anything in the Mesa CI YAML files.
|
||||||
and you should have cached downloads for traces. Add it to
|
Add ``LAVA_HTTP_CACHE_URI=http://localhost/cache/?uri=`` to your ``config.toml``
|
||||||
``FDO_HTTP_CACHE_URI=`` in your ``config.toml`` runner environment lines and you
|
runner environment lines and you can use it for cached artifact downloads
|
||||||
can use it for cached artifact downloads instead of going all the way to
|
instead of going all the way to freedesktop.org on each job.
|
||||||
freedesktop.org on each job.
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ Release Notes
|
||||||
|
|
||||||
The release notes summarize what's new or changed in each Mesa release.
|
The release notes summarize what's new or changed in each Mesa release.
|
||||||
|
|
||||||
|
- :doc:`25.3.0 release notes <relnotes/25.3.0>`
|
||||||
- :doc:`25.2.5 release notes <relnotes/25.2.5>`
|
- :doc:`25.2.5 release notes <relnotes/25.2.5>`
|
||||||
- :doc:`25.2.4 release notes <relnotes/25.2.4>`
|
- :doc:`25.2.4 release notes <relnotes/25.2.4>`
|
||||||
- :doc:`25.2.3 release notes <relnotes/25.2.3>`
|
- :doc:`25.2.3 release notes <relnotes/25.2.3>`
|
||||||
|
|
@ -466,6 +467,7 @@ The release notes summarize what's new or changed in each Mesa release.
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
|
25.3.0 <relnotes/25.3.0>
|
||||||
25.2.5 <relnotes/25.2.5>
|
25.2.5 <relnotes/25.2.5>
|
||||||
25.2.4 <relnotes/25.2.4>
|
25.2.4 <relnotes/25.2.4>
|
||||||
25.2.3 <relnotes/25.2.3>
|
25.2.3 <relnotes/25.2.3>
|
||||||
|
|
|
||||||
6071
docs/relnotes/25.3.0.rst
Normal file
6071
docs/relnotes/25.3.0.rst
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,89 +0,0 @@
|
||||||
EGL_EXT_create_context_robustness support on Panfrost V10+
|
|
||||||
GL_ARB_robust_buffer_access_behavior, GL_KHR_robust_buffer_access_behavior and GL_KHR_robustness support on Panfrost
|
|
||||||
VK_EXT_mutable_descriptor_type on panvk/v9+
|
|
||||||
GL_KHR_robustness on v3d
|
|
||||||
VK_ARM_shader_core_builtins on panvk
|
|
||||||
VK_KHR_shader_untyped_pointers on anv
|
|
||||||
cl_ext_immutable_memory_objects
|
|
||||||
VK_KHR_video_encode_intra_refresh on radv
|
|
||||||
VK_KHR_video_encode_quantization_map on radv
|
|
||||||
GL_ATI_meminfo and GL_NVX_gpu_memory_info on r300
|
|
||||||
VK_KHR_shader_untyped_pointers on anv and RADV
|
|
||||||
VK_KHR_maintenance8 on NVK
|
|
||||||
VK_KHR_maintenance9 on NVK
|
|
||||||
cl_khr_semaphore on radeonsi and zink
|
|
||||||
cl_khr_external_semaphore on radeonsi and zink
|
|
||||||
cl_khr_external_semaphore_sync_fd on radeonsi and zink
|
|
||||||
GL_NV_shader_atomic_int64 on radeonsi and Panfrost V9+
|
|
||||||
VK_KHR_maintenance7 on panvk/v10+
|
|
||||||
VK_KHR_maintenance8 on panvk/v10+
|
|
||||||
VK_KHR_maintenance9 on panvk
|
|
||||||
VK_AMD_buffer_marker on NVK
|
|
||||||
VK_EXT_ycbcr_2plane_444_formats on radv
|
|
||||||
Removed VDPAU frontend
|
|
||||||
GL_NV_representative_fragment_test on zink
|
|
||||||
VK_KHR_maintenance9 on HoneyKrisp
|
|
||||||
sparseBinding on panvk/v10+
|
|
||||||
sparseResidencyBuffer on panvk/v10+
|
|
||||||
Vulkan 1.2 on pvr
|
|
||||||
VK_KHR_create_renderpass2 on pvr
|
|
||||||
VK_KHR_dedicated_allocation on pvr
|
|
||||||
VK_KHR_depth_stencil_resolve on pvr
|
|
||||||
VK_KHR_descriptor_update_template on pvr
|
|
||||||
VK_KHR_imageless_framebuffer on pvr
|
|
||||||
VK_KHR_line_rasterization on pvr
|
|
||||||
VK_KHR_maintenance1 on pvr
|
|
||||||
VK_KHR_maintenance2 on pvr
|
|
||||||
VK_KHR_maintenance3 on pvr
|
|
||||||
VK_KHR_multiview on pvr
|
|
||||||
VK_KHR_robustness2 on pvr
|
|
||||||
VK_KHR_separate_depth_stencil_layouts on pvr
|
|
||||||
VK_KHR_shader_draw_parameters on pvr
|
|
||||||
VK_KHR_shader_float_controls on pvr
|
|
||||||
VK_KHR_shader_subgroup_extended_types on pvr
|
|
||||||
VK_KHR_spirv_1_4 on pvr
|
|
||||||
VK_KHR_shader_terminate_invocation on pvr
|
|
||||||
VK_KHR_swapchain_mutable_format on pvr
|
|
||||||
VK_KHR_vertex_attribute_divisor on pvr
|
|
||||||
VK_EXT_border_color_swizzle on pvr
|
|
||||||
VK_EXT_color_write_enable on pvr
|
|
||||||
VK_EXT_custom_border_color on pvr
|
|
||||||
VK_EXT_depth_clamp_zero_one on pvr
|
|
||||||
VK_EXT_depth_clip_enable on pvr
|
|
||||||
VK_EXT_extended_dynamic_state on pvr
|
|
||||||
VK_EXT_extended_dynamic_state2 on pvr
|
|
||||||
VK_EXT_extended_dynamic_state3 on pvr
|
|
||||||
VK_EXT_image_2d_view_of_3d on pvr
|
|
||||||
VK_EXT_line_rasterization on pvr
|
|
||||||
VK_EXT_physical_device_drm on pvr
|
|
||||||
VK_EXT_provoking_vertex on pvr
|
|
||||||
VK_EXT_robustness2 on pvr
|
|
||||||
VK_EXT_queue_family_foreign on pvr
|
|
||||||
VK_EXT_separate_stencil_usage on pvr
|
|
||||||
VK_EXT_shader_demote_to_helper_invocation on pvr
|
|
||||||
VK_EXT_vertex_attribute_divisor on pvr
|
|
||||||
imageCubeArray on pvr
|
|
||||||
independentBlend on pvr
|
|
||||||
sampleRateShading on pvr
|
|
||||||
logicOp on pvr
|
|
||||||
drawIndirectFirstInstance on pvr
|
|
||||||
alphaToOne on pvr
|
|
||||||
samplerAnisotropy on pvr
|
|
||||||
shaderStorageImageExtendedFormats on pvr
|
|
||||||
shaderStorageImageReadWithoutFormat on pvr
|
|
||||||
shaderStorageImageWriteWithoutFormat on pvr
|
|
||||||
shaderClipDistance on pvr
|
|
||||||
shaderCullDistance on pvr
|
|
||||||
VK_EXT_zero_initialize_device_memory on pvr
|
|
||||||
VK_KHR_sampler_mirror_clamp_to_edge on pvr
|
|
||||||
VK_KHR_shader_non_semantic_info on pvr
|
|
||||||
VK_KHR_shader_relaxed_extended_instruction on pvr
|
|
||||||
VK_EXT_shader_replicated_composites on pvr
|
|
||||||
VK_KHR_device_group_creation on pvr
|
|
||||||
VK_KHR_map_memory2 on pvr
|
|
||||||
VK_EXT_map_memory_placed on pvr
|
|
||||||
VK_KHR_device_group on pvr
|
|
||||||
VK_KHR_buffer_device_address on pvr
|
|
||||||
GL_EXT_mesh_shader on zink
|
|
||||||
VK_KHR_wayland_surface on pvr
|
|
||||||
VK_NVX_image_view_handle on NVK
|
|
||||||
|
|
@ -1489,8 +1489,6 @@ struct drm_amdgpu_info_hw_ip {
|
||||||
__u32 available_rings;
|
__u32 available_rings;
|
||||||
/** version info: bits 23:16 major, 15:8 minor, 7:0 revision */
|
/** version info: bits 23:16 major, 15:8 minor, 7:0 revision */
|
||||||
__u32 ip_discovery_version;
|
__u32 ip_discovery_version;
|
||||||
/* Userq available slots */
|
|
||||||
__u32 userq_num_slots;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* GFX metadata BO sizes and alignment info (in bytes) */
|
/* GFX metadata BO sizes and alignment info (in bytes) */
|
||||||
|
|
|
||||||
|
|
@ -979,14 +979,20 @@ extern "C" {
|
||||||
* 2 = Gob Height 8, Turing+ Page Kind mapping
|
* 2 = Gob Height 8, Turing+ Page Kind mapping
|
||||||
* 3 = Reserved for future use.
|
* 3 = Reserved for future use.
|
||||||
*
|
*
|
||||||
* 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further
|
* 22:22 s Sector layout. There is a further bit remapping step that occurs
|
||||||
* bit remapping step that occurs at an even lower level than the
|
* 26:27 at an even lower level than the page kind and block linear
|
||||||
* page kind and block linear swizzles. This causes the layout of
|
* swizzles. This causes the bit arrangement of surfaces in memory
|
||||||
* surfaces mapped in those SOC's GPUs to be incompatible with the
|
* to differ subtly, and prevents direct sharing of surfaces between
|
||||||
* equivalent mapping on other GPUs in the same system.
|
* GPUs with different layouts.
|
||||||
*
|
*
|
||||||
* 0 = Tegra K1 - Tegra Parker/TX2 Layout.
|
* 0 = Tegra K1 - Tegra Parker/TX2 Layout
|
||||||
* 1 = Desktop GPU and Tegra Xavier+ Layout
|
* 1 = Pre-GB20x, GB20x 32+ bpp, GB10, Tegra Xavier-Orin Layout
|
||||||
|
* 2 = GB20x(Blackwell 2)+ 8 bpp surface layout
|
||||||
|
* 3 = GB20x(Blackwell 2)+ 16 bpp surface layout
|
||||||
|
* 4 = Reserved for future use.
|
||||||
|
* 5 = Reserved for future use.
|
||||||
|
* 6 = Reserved for future use.
|
||||||
|
* 7 = Reserved for future use.
|
||||||
*
|
*
|
||||||
* 25:23 c Lossless Framebuffer Compression type.
|
* 25:23 c Lossless Framebuffer Compression type.
|
||||||
*
|
*
|
||||||
|
|
@ -1001,7 +1007,7 @@ extern "C" {
|
||||||
* 6 = Reserved for future use
|
* 6 = Reserved for future use
|
||||||
* 7 = Reserved for future use
|
* 7 = Reserved for future use
|
||||||
*
|
*
|
||||||
* 55:25 - Reserved for future use. Must be zero.
|
* 55:28 - Reserved for future use. Must be zero.
|
||||||
*/
|
*/
|
||||||
#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
|
#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \
|
||||||
fourcc_mod_code(NVIDIA, (0x10 | \
|
fourcc_mod_code(NVIDIA, (0x10 | \
|
||||||
|
|
@ -1009,6 +1015,7 @@ extern "C" {
|
||||||
(((k) & 0xff) << 12) | \
|
(((k) & 0xff) << 12) | \
|
||||||
(((g) & 0x3) << 20) | \
|
(((g) & 0x3) << 20) | \
|
||||||
(((s) & 0x1) << 22) | \
|
(((s) & 0x1) << 22) | \
|
||||||
|
(((s) & 0x6) << 25) | \
|
||||||
(((c) & 0x7) << 23)))
|
(((c) & 0x7) << 23)))
|
||||||
|
|
||||||
/* To grandfather in prior block linear format modifiers to the above layout,
|
/* To grandfather in prior block linear format modifiers to the above layout,
|
||||||
|
|
@ -1017,7 +1024,7 @@ extern "C" {
|
||||||
* which corresponds to the "generic" kind used for simple single-sample
|
* which corresponds to the "generic" kind used for simple single-sample
|
||||||
* uncompressed color formats on Fermi - Volta GPUs.
|
* uncompressed color formats on Fermi - Volta GPUs.
|
||||||
*/
|
*/
|
||||||
static __inline__ __u64
|
static inline __u64
|
||||||
drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
|
drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier)
|
||||||
{
|
{
|
||||||
if (!(modifier & 0x10) || (modifier & (0xff << 12)))
|
if (!(modifier & 0x10) || (modifier & (0xff << 12)))
|
||||||
|
|
|
||||||
|
|
@ -2191,7 +2191,7 @@ endif
|
||||||
|
|
||||||
with_sysprof = get_option('sysprof')
|
with_sysprof = get_option('sysprof')
|
||||||
if with_sysprof
|
if with_sysprof
|
||||||
dep_sysprof = dependency('sysprof-capture-4', version: '>= 3.38.0')
|
dep_sysprof = dependency('sysprof-capture-4', version: '>= 4.49.0')
|
||||||
pre_args += '-DHAVE_SYSPROF'
|
pre_args += '-DHAVE_SYSPROF'
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -315,8 +315,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||||
info->ip[ip_type].num_queues = 1;
|
info->ip[ip_type].num_queues = 1;
|
||||||
} else if (ip_info.available_rings) {
|
} else if (ip_info.available_rings) {
|
||||||
info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);
|
info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);
|
||||||
} else if (ip_info.userq_num_slots) {
|
|
||||||
info->ip[ip_type].num_queue_slots = ip_info.userq_num_slots;
|
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -1696,11 +1694,11 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f)
|
||||||
fprintf(f, " clock_crystal_freq = %i KHz\n", info->clock_crystal_freq);
|
fprintf(f, " clock_crystal_freq = %i KHz\n", info->clock_crystal_freq);
|
||||||
|
|
||||||
for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) {
|
for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) {
|
||||||
if (info->ip[i].num_queues || info->ip[i].num_queue_slots) {
|
if (info->ip[i].num_queues) {
|
||||||
fprintf(f, " IP %-7s %2u.%u \tqueues:%u \tqueue_slots:%u \talign:%u \tpad_dw:0x%x\n",
|
fprintf(f, " IP %-7s %2u.%u \tqueues:%u \talign:%u \tpad_dw:0x%x\n",
|
||||||
ac_get_ip_type_string(info, i),
|
ac_get_ip_type_string(info, i),
|
||||||
info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues,
|
info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues,
|
||||||
info->ip[i].num_queue_slots,info->ip[i].ib_alignment, info->ip[i].ib_pad_dw_mask);
|
info->ip[i].ib_alignment, info->ip[i].ib_pad_dw_mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@ struct amd_ip_info {
|
||||||
uint8_t ver_minor;
|
uint8_t ver_minor;
|
||||||
uint8_t ver_rev;
|
uint8_t ver_rev;
|
||||||
uint8_t num_queues;
|
uint8_t num_queues;
|
||||||
uint8_t num_queue_slots;
|
|
||||||
uint8_t num_instances;
|
uint8_t num_instances;
|
||||||
uint32_t ib_alignment;
|
uint32_t ib_alignment;
|
||||||
uint32_t ib_pad_dw_mask;
|
uint32_t ib_pad_dw_mask;
|
||||||
|
|
|
||||||
|
|
@ -194,7 +194,6 @@ struct drm_amdgpu_info_hw_ip {
|
||||||
uint32_t ib_size_alignment;
|
uint32_t ib_size_alignment;
|
||||||
uint32_t available_rings;
|
uint32_t available_rings;
|
||||||
uint32_t ip_discovery_version;
|
uint32_t ip_discovery_version;
|
||||||
uint32_t userq_num_slots;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct drm_amdgpu_info_uq_fw_areas_gfx {
|
struct drm_amdgpu_info_uq_fw_areas_gfx {
|
||||||
|
|
|
||||||
|
|
@ -498,6 +498,7 @@ typedef struct rvcn_enc_hevc_encode_params_s {
|
||||||
typedef struct rvcn_enc_av1_encode_params_s {
|
typedef struct rvcn_enc_av1_encode_params_s {
|
||||||
uint32_t ref_frames[RENCODE_AV1_REFS_PER_FRAME];
|
uint32_t ref_frames[RENCODE_AV1_REFS_PER_FRAME];
|
||||||
uint32_t lsm_reference_frame_index[2];
|
uint32_t lsm_reference_frame_index[2];
|
||||||
|
uint32_t cur_order_hint;
|
||||||
} rvcn_enc_av1_encode_params_t;
|
} rvcn_enc_av1_encode_params_t;
|
||||||
|
|
||||||
typedef struct rvcn_enc_h264_deblocking_filter_s {
|
typedef struct rvcn_enc_h264_deblocking_filter_s {
|
||||||
|
|
|
||||||
|
|
@ -109,23 +109,37 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
||||||
nir_mem_access_size_align res;
|
nir_mem_access_size_align res;
|
||||||
|
|
||||||
if (intrin == nir_intrinsic_load_shared || intrin == nir_intrinsic_store_shared) {
|
if (intrin == nir_intrinsic_load_shared || intrin == nir_intrinsic_store_shared) {
|
||||||
/* Split unsupported shared access. */
|
|
||||||
res.bit_size = MIN2(bit_size, combined_align * 8ull);
|
|
||||||
res.align = res.bit_size / 8;
|
|
||||||
/* Don't use >64-bit LDS loads for performance reasons. */
|
/* Don't use >64-bit LDS loads for performance reasons. */
|
||||||
unsigned max_bytes = intrin == nir_intrinsic_store_shared && cb_data->gfx_level >= GFX7 ? 16 : 8;
|
unsigned max_bytes = intrin == nir_intrinsic_store_shared && cb_data->gfx_level >= GFX7 ? 16 : 8;
|
||||||
bytes = MIN3(bytes, combined_align, max_bytes);
|
bytes = MIN3(bytes, combined_align, max_bytes);
|
||||||
bytes = bytes == 12 ? bytes : round_down_to_power_of_2(bytes);
|
bytes = bytes == 12 ? bytes : round_down_to_power_of_2(bytes);
|
||||||
|
|
||||||
|
/* Split unsupported shared access. */
|
||||||
|
res.bit_size = MIN2(bit_size, bytes * 8ull);
|
||||||
|
res.align = res.bit_size / 8;
|
||||||
res.num_components = bytes / res.align;
|
res.num_components = bytes / res.align;
|
||||||
res.shift = nir_mem_access_shift_method_bytealign_amd;
|
res.shift = nir_mem_access_shift_method_bytealign_amd;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool is_buffer_load = intrin == nir_intrinsic_load_ubo ||
|
||||||
|
intrin == nir_intrinsic_load_ssbo ||
|
||||||
|
intrin == nir_intrinsic_load_constant;
|
||||||
|
|
||||||
if (is_smem) {
|
if (is_smem) {
|
||||||
|
const bool supported_subdword = cb_data->gfx_level >= GFX12 &&
|
||||||
|
intrin != nir_intrinsic_load_push_constant &&
|
||||||
|
(!cb_data->use_llvm || intrin != nir_intrinsic_load_ubo);
|
||||||
|
|
||||||
/* Round up subdword loads if unsupported. */
|
/* Round up subdword loads if unsupported. */
|
||||||
const bool supported_subdword = cb_data->gfx_level >= GFX12 && intrin != nir_intrinsic_load_push_constant;
|
if (bytes <= 2 && combined_align % bytes == 0 && supported_subdword) {
|
||||||
if (bit_size < 32 && (bytes >= 3 || !supported_subdword))
|
bit_size = bytes * 8;
|
||||||
|
} else if (bytes % 4 || combined_align % 4) {
|
||||||
|
if (is_buffer_load)
|
||||||
|
bytes += 4 - MIN2(combined_align, 4);
|
||||||
bytes = align(bytes, 4);
|
bytes = align(bytes, 4);
|
||||||
|
bit_size = 32;
|
||||||
|
}
|
||||||
|
|
||||||
/* Generally, require an alignment of 4. */
|
/* Generally, require an alignment of 4. */
|
||||||
res.align = MIN2(4, bytes);
|
res.align = MIN2(4, bytes);
|
||||||
|
|
@ -138,9 +152,6 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
||||||
if (!util_is_power_of_two_nonzero(bytes) && (cb_data->gfx_level < GFX12 || bytes != 12)) {
|
if (!util_is_power_of_two_nonzero(bytes) && (cb_data->gfx_level < GFX12 || bytes != 12)) {
|
||||||
const uint8_t larger = util_next_power_of_two(bytes);
|
const uint8_t larger = util_next_power_of_two(bytes);
|
||||||
const uint8_t smaller = larger / 2;
|
const uint8_t smaller = larger / 2;
|
||||||
const bool is_buffer_load = intrin == nir_intrinsic_load_ubo ||
|
|
||||||
intrin == nir_intrinsic_load_ssbo ||
|
|
||||||
intrin == nir_intrinsic_load_constant;
|
|
||||||
const bool is_aligned = align_mul % smaller == 0;
|
const bool is_aligned = align_mul % smaller == 0;
|
||||||
|
|
||||||
/* Overfetch up to 1 dword if this is a bounds-checked buffer load or the access is aligned. */
|
/* Overfetch up to 1 dword if this is a bounds-checked buffer load or the access is aligned. */
|
||||||
|
|
@ -185,8 +196,8 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
||||||
|
|
||||||
const uint32_t max_pad = 4 - MIN2(combined_align, 4);
|
const uint32_t max_pad = 4 - MIN2(combined_align, 4);
|
||||||
|
|
||||||
/* Global loads don't have bounds checking, so increasing the size might not be safe. */
|
/* Global/scratch loads don't have bounds checking, so increasing the size might not be safe. */
|
||||||
if (intrin == nir_intrinsic_load_global || intrin == nir_intrinsic_load_global_constant) {
|
if (!is_buffer_load) {
|
||||||
if (align_mul < 4) {
|
if (align_mul < 4) {
|
||||||
/* If we split the load, only lower it to 32-bit if this is a SMEM load. */
|
/* If we split the load, only lower it to 32-bit if this is a SMEM load. */
|
||||||
const unsigned chunk_bytes = align(bytes, 4) - max_pad;
|
const unsigned chunk_bytes = align(bytes, 4) - max_pad;
|
||||||
|
|
|
||||||
|
|
@ -1817,10 +1817,25 @@ ac_ngg_get_scratch_lds_size(mesa_shader_stage stage,
|
||||||
} else {
|
} else {
|
||||||
assert(stage == MESA_SHADER_GEOMETRY);
|
assert(stage == MESA_SHADER_GEOMETRY);
|
||||||
|
|
||||||
|
/* Repacking output vertices at the end in ngg_gs_finale() uses 1 dword per 4 waves */
|
||||||
scratch_lds_size = ALIGN(max_num_waves, 4u);
|
scratch_lds_size = ALIGN(max_num_waves, 4u);
|
||||||
/* streamout take 8 dwords for buffer offset and emit vertex per stream */
|
|
||||||
if (streamout_enabled)
|
/* For streamout:
|
||||||
scratch_lds_size = MAX2(scratch_lds_size, 32);
|
* - Repacking streamout vertices takes 1 dword per 4 waves per stream
|
||||||
|
* (max 16 bytes for Wave64, 32 bytes for Wave32)
|
||||||
|
* - 1 dword per stream for buffer info
|
||||||
|
* (16 bytes)
|
||||||
|
* - 1 dword per buffer for buffer info
|
||||||
|
* (16 bytes)
|
||||||
|
*/
|
||||||
|
if (streamout_enabled) {
|
||||||
|
const unsigned num_streams = 4;
|
||||||
|
const unsigned num_so_buffers = 4;
|
||||||
|
const unsigned streamout_scratch_size =
|
||||||
|
num_streams * ALIGN(max_num_waves, 4u) + num_streams * 4 + num_so_buffers * 4;
|
||||||
|
|
||||||
|
scratch_lds_size += streamout_scratch_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return scratch_lds_size;
|
return scratch_lds_size;
|
||||||
|
|
|
||||||
|
|
@ -660,6 +660,10 @@ ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *s)
|
||||||
nir_def *export_seq[4] = {0};
|
nir_def *export_seq[4] = {0};
|
||||||
nir_def *out_vtx_primflag[4] = {0};
|
nir_def *out_vtx_primflag[4] = {0};
|
||||||
|
|
||||||
|
const unsigned scratch_stride = ALIGN(s->max_num_waves, 4);
|
||||||
|
const unsigned scratch_base_off = scratch_stride;
|
||||||
|
const unsigned num_streams = util_bitcount(info->streams_written);
|
||||||
|
|
||||||
u_foreach_bit(stream, info->streams_written) {
|
u_foreach_bit(stream, info->streams_written) {
|
||||||
out_vtx_primflag[stream] =
|
out_vtx_primflag[stream] =
|
||||||
ngg_gs_load_out_vtx_primflag(b, stream, tid_in_tg, out_vtx_lds_addr, max_vtxcnt, s);
|
ngg_gs_load_out_vtx_primflag(b, stream, tid_in_tg, out_vtx_lds_addr, max_vtxcnt, s);
|
||||||
|
|
@ -669,9 +673,8 @@ ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *s)
|
||||||
*/
|
*/
|
||||||
prim_live[stream] = nir_i2b(b, nir_iand_imm(b, out_vtx_primflag[stream], 1));
|
prim_live[stream] = nir_i2b(b, nir_iand_imm(b, out_vtx_primflag[stream], 1));
|
||||||
|
|
||||||
unsigned scratch_stride = ALIGN(s->max_num_waves, 4);
|
|
||||||
nir_def *scratch_base =
|
nir_def *scratch_base =
|
||||||
nir_iadd_imm(b, s->lds_addr_gs_out_vtx, stream * scratch_stride);
|
nir_iadd_imm(b, s->lds_addr_gs_out_vtx, stream * scratch_stride + scratch_base_off);
|
||||||
|
|
||||||
/* We want to export primitives to streamout buffer in sequence,
|
/* We want to export primitives to streamout buffer in sequence,
|
||||||
* but not all vertices are alive or mark end of a primitive, so
|
* but not all vertices are alive or mark end of a primitive, so
|
||||||
|
|
@ -697,18 +700,14 @@ ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *s)
|
||||||
export_seq[stream] = rep.repacked_invocation_index;
|
export_seq[stream] = rep.repacked_invocation_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Workgroup barrier: wait for LDS scratch reads finish. */
|
|
||||||
nir_barrier(b, .execution_scope = SCOPE_WORKGROUP,
|
|
||||||
.memory_scope = SCOPE_WORKGROUP,
|
|
||||||
.memory_semantics = NIR_MEMORY_ACQ_REL,
|
|
||||||
.memory_modes = nir_var_mem_shared);
|
|
||||||
|
|
||||||
/* Get global buffer offset where this workgroup will stream out data to. */
|
/* Get global buffer offset where this workgroup will stream out data to. */
|
||||||
nir_def *emit_prim[4] = {0};
|
nir_def *emit_prim[4] = {0};
|
||||||
nir_def *buffer_offsets[4] = {0};
|
nir_def *buffer_offsets[4] = {0};
|
||||||
nir_def *so_buffer[4] = {0};
|
nir_def *so_buffer[4] = {0};
|
||||||
|
nir_def *buffer_info_scratch_base =
|
||||||
|
nir_iadd_imm_nuw(b, s->lds_addr_gs_out_vtx, num_streams * scratch_stride + scratch_base_off);
|
||||||
ac_nir_ngg_build_streamout_buffer_info(b, info, s->options->hw_info->gfx_level, s->options->has_xfb_prim_query,
|
ac_nir_ngg_build_streamout_buffer_info(b, info, s->options->hw_info->gfx_level, s->options->has_xfb_prim_query,
|
||||||
s->options->use_gfx12_xfb_intrinsic, s->lds_addr_gs_out_vtx, tid_in_tg,
|
s->options->use_gfx12_xfb_intrinsic, buffer_info_scratch_base, tid_in_tg,
|
||||||
gen_prim, so_buffer, buffer_offsets, emit_prim);
|
gen_prim, so_buffer, buffer_offsets, emit_prim);
|
||||||
|
|
||||||
u_foreach_bit(stream, info->streams_written) {
|
u_foreach_bit(stream, info->streams_written) {
|
||||||
|
|
|
||||||
|
|
@ -508,6 +508,8 @@ lower_ms_intrinsic(nir_builder *b, nir_instr *instr, void *state)
|
||||||
return update_ms_barrier(b, intrin, s);
|
return update_ms_barrier(b, intrin, s);
|
||||||
case nir_intrinsic_load_workgroup_index:
|
case nir_intrinsic_load_workgroup_index:
|
||||||
return lower_ms_load_workgroup_index(b, intrin, s);
|
return lower_ms_load_workgroup_index(b, intrin, s);
|
||||||
|
case nir_intrinsic_load_num_subgroups:
|
||||||
|
return nir_imm_int(b, DIV_ROUND_UP(s->api_workgroup_size, s->wave_size));
|
||||||
case nir_intrinsic_set_vertex_and_primitive_count:
|
case nir_intrinsic_set_vertex_and_primitive_count:
|
||||||
return lower_ms_set_vertex_and_primitive_count(b, intrin, s);
|
return lower_ms_set_vertex_and_primitive_count(b, intrin, s);
|
||||||
default:
|
default:
|
||||||
|
|
@ -529,6 +531,7 @@ filter_ms_intrinsic(const nir_instr *instr,
|
||||||
intrin->intrinsic == nir_intrinsic_store_per_primitive_output ||
|
intrin->intrinsic == nir_intrinsic_store_per_primitive_output ||
|
||||||
intrin->intrinsic == nir_intrinsic_barrier ||
|
intrin->intrinsic == nir_intrinsic_barrier ||
|
||||||
intrin->intrinsic == nir_intrinsic_load_workgroup_index ||
|
intrin->intrinsic == nir_intrinsic_load_workgroup_index ||
|
||||||
|
intrin->intrinsic == nir_intrinsic_load_num_subgroups ||
|
||||||
intrin->intrinsic == nir_intrinsic_set_vertex_and_primitive_count;
|
intrin->intrinsic == nir_intrinsic_set_vertex_and_primitive_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -338,6 +338,17 @@ Only `s_waitcnt_vscnt null, 0`. Needed even if the first instruction is a load.
|
||||||
NSA MIMG instructions should be limited to 3 dwords before GFX10.3 to avoid
|
NSA MIMG instructions should be limited to 3 dwords before GFX10.3 to avoid
|
||||||
stability issues: https://reviews.llvm.org/D103348
|
stability issues: https://reviews.llvm.org/D103348
|
||||||
|
|
||||||
|
## RDNA2 / GFX10.3 hazards
|
||||||
|
|
||||||
|
### SALU EXEC write followed by NSA MIMG instruction
|
||||||
|
|
||||||
|
Triggered-by:
|
||||||
|
Potential stability issues can occur if an SALU instruction changes exec from 0
|
||||||
|
to non-zero immediately before an NSA MIMG instruction with 4+ dwords.
|
||||||
|
|
||||||
|
Mitigated-by: Any instruction, including `s_nop`.
|
||||||
|
|
||||||
|
|
||||||
## RDNA3 / GFX11 hazards
|
## RDNA3 / GFX11 hazards
|
||||||
|
|
||||||
### VcmpxPermlaneHazard
|
### VcmpxPermlaneHazard
|
||||||
|
|
|
||||||
|
|
@ -129,6 +129,7 @@ struct NOP_ctx_gfx10 {
|
||||||
bool has_branch_after_DS = false;
|
bool has_branch_after_DS = false;
|
||||||
bool has_NSA_MIMG = false;
|
bool has_NSA_MIMG = false;
|
||||||
bool has_writelane = false;
|
bool has_writelane = false;
|
||||||
|
bool has_salu_exec_write = false;
|
||||||
std::bitset<128> sgprs_read_by_VMEM;
|
std::bitset<128> sgprs_read_by_VMEM;
|
||||||
std::bitset<128> sgprs_read_by_VMEM_store;
|
std::bitset<128> sgprs_read_by_VMEM_store;
|
||||||
std::bitset<128> sgprs_read_by_DS;
|
std::bitset<128> sgprs_read_by_DS;
|
||||||
|
|
@ -145,6 +146,7 @@ struct NOP_ctx_gfx10 {
|
||||||
has_branch_after_DS |= other.has_branch_after_DS;
|
has_branch_after_DS |= other.has_branch_after_DS;
|
||||||
has_NSA_MIMG |= other.has_NSA_MIMG;
|
has_NSA_MIMG |= other.has_NSA_MIMG;
|
||||||
has_writelane |= other.has_writelane;
|
has_writelane |= other.has_writelane;
|
||||||
|
has_salu_exec_write |= other.has_salu_exec_write;
|
||||||
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
|
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
|
||||||
sgprs_read_by_DS |= other.sgprs_read_by_DS;
|
sgprs_read_by_DS |= other.sgprs_read_by_DS;
|
||||||
sgprs_read_by_VMEM_store |= other.sgprs_read_by_VMEM_store;
|
sgprs_read_by_VMEM_store |= other.sgprs_read_by_VMEM_store;
|
||||||
|
|
@ -159,6 +161,7 @@ struct NOP_ctx_gfx10 {
|
||||||
has_branch_after_VMEM == other.has_branch_after_VMEM && has_DS == other.has_DS &&
|
has_branch_after_VMEM == other.has_branch_after_VMEM && has_DS == other.has_DS &&
|
||||||
has_branch_after_DS == other.has_branch_after_DS &&
|
has_branch_after_DS == other.has_branch_after_DS &&
|
||||||
has_NSA_MIMG == other.has_NSA_MIMG && has_writelane == other.has_writelane &&
|
has_NSA_MIMG == other.has_NSA_MIMG && has_writelane == other.has_writelane &&
|
||||||
|
has_salu_exec_write == other.has_salu_exec_write &&
|
||||||
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
|
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
|
||||||
sgprs_read_by_DS == other.sgprs_read_by_DS &&
|
sgprs_read_by_DS == other.sgprs_read_by_DS &&
|
||||||
sgprs_read_by_VMEM_store == other.sgprs_read_by_VMEM_store &&
|
sgprs_read_by_VMEM_store == other.sgprs_read_by_VMEM_store &&
|
||||||
|
|
@ -907,6 +910,15 @@ handle_instruction_gfx10(State& state, NOP_ctx_gfx10& ctx, aco_ptr<Instruction>&
|
||||||
ctx.waits_since_fp_atomic = std::min(ctx.waits_since_fp_atomic, 3);
|
ctx.waits_since_fp_atomic = std::min(ctx.waits_since_fp_atomic, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* 4+ dword NSA can hang if exec becomes non-zero again directly before the instruction. */
|
||||||
|
if (instr->isSALU() && instr->writes_exec()) {
|
||||||
|
ctx.has_salu_exec_write = true;
|
||||||
|
} else if (ctx.has_salu_exec_write) {
|
||||||
|
ctx.has_salu_exec_write = false;
|
||||||
|
if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1)
|
||||||
|
bld.sopp(aco_opcode::s_nop, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (state.program->gfx_level != GFX10)
|
if (state.program->gfx_level != GFX10)
|
||||||
return; /* no other hazards/bugs to mitigate */
|
return; /* no other hazards/bugs to mitigate */
|
||||||
|
|
||||||
|
|
@ -2019,13 +2031,15 @@ required_export_priority(Program* program)
|
||||||
void
|
void
|
||||||
insert_NOPs(Program* program)
|
insert_NOPs(Program* program)
|
||||||
{
|
{
|
||||||
|
bool has_previous_part =
|
||||||
|
program->is_epilog || program->info.vs.has_prolog || program->info.ps.has_prolog ||
|
||||||
|
(program->info.merged_shader_compiled_separately && program->stage.sw != SWStage::VS &&
|
||||||
|
program->stage.sw != SWStage::TES) ||
|
||||||
|
program->stage == raytracing_cs;
|
||||||
|
|
||||||
if (program->gfx_level >= GFX11) {
|
if (program->gfx_level >= GFX11) {
|
||||||
NOP_ctx_gfx11 initial_ctx;
|
NOP_ctx_gfx11 initial_ctx;
|
||||||
|
|
||||||
bool has_previous_part =
|
|
||||||
program->is_epilog || program->info.vs.has_prolog || program->info.ps.has_prolog ||
|
|
||||||
(program->info.merged_shader_compiled_separately && program->stage.sw != SWStage::VS &&
|
|
||||||
program->stage.sw != SWStage::TES) || program->stage == raytracing_cs;
|
|
||||||
if (program->gfx_level >= GFX12 && has_previous_part) {
|
if (program->gfx_level >= GFX12 && has_previous_part) {
|
||||||
/* resolve_all_gfx11 can't resolve VALUReadSGPRHazard entirely. We have to assume that any
|
/* resolve_all_gfx11 can't resolve VALUReadSGPRHazard entirely. We have to assume that any
|
||||||
* SGPR might have been read by VALU if there was a previous shader part.
|
* SGPR might have been read by VALU if there was a previous shader part.
|
||||||
|
|
@ -2036,7 +2050,10 @@ insert_NOPs(Program* program)
|
||||||
mitigate_hazards<NOP_ctx_gfx11, handle_instruction_gfx11, resolve_all_gfx11>(program,
|
mitigate_hazards<NOP_ctx_gfx11, handle_instruction_gfx11, resolve_all_gfx11>(program,
|
||||||
initial_ctx);
|
initial_ctx);
|
||||||
} else if (program->gfx_level >= GFX10) {
|
} else if (program->gfx_level >= GFX10) {
|
||||||
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10, resolve_all_gfx10>(program);
|
NOP_ctx_gfx10 initial_ctx;
|
||||||
|
initial_ctx.has_salu_exec_write = has_previous_part;
|
||||||
|
mitigate_hazards<NOP_ctx_gfx10, handle_instruction_gfx10, resolve_all_gfx10>(program,
|
||||||
|
initial_ctx);
|
||||||
} else {
|
} else {
|
||||||
mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6, resolve_all_gfx6>(program);
|
mitigate_hazards<NOP_ctx_gfx6, handle_instruction_gfx6, resolve_all_gfx6>(program);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -214,6 +214,8 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
||||||
bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm),
|
bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm),
|
||||||
Operand::c32_or_c64(-1u, program->wave_size == 64),
|
Operand::c32_or_c64(-1u, program->wave_size == 64),
|
||||||
Operand::c32_or_c64(0, program->wave_size == 64), Operand(scc, s1));
|
Operand::c32_or_c64(0, program->wave_size == 64), Operand(scc, s1));
|
||||||
|
bld.sop2(aco_opcode::s_cselect_b32, Definition(out_launch_size_y, s1),
|
||||||
|
Operand(out_launch_size_y, s1), Operand::c32(1), Operand(scc, s1));
|
||||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[0], v1),
|
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[0], v1),
|
||||||
Operand(tmp_invocation_idx, v1), Operand(out_launch_ids[0], v1), Operand(vcc, bld.lm));
|
Operand(tmp_invocation_idx, v1), Operand(out_launch_ids[0], v1), Operand(vcc, bld.lm));
|
||||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(),
|
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(),
|
||||||
|
|
|
||||||
|
|
@ -338,6 +338,22 @@ load_unaligned_vs_attrib(Builder& bld, PhysReg dst, Operand desc, Operand index,
|
||||||
state->current_loads.push_back(load);
|
state->current_loads.push_back(load);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
is_last_attribute_large(const struct aco_vs_prolog_info* pinfo)
|
||||||
|
{
|
||||||
|
const struct ac_vtx_format_info* vtx_info_table =
|
||||||
|
ac_get_vtx_format_info_table(GFX8, CHIP_POLARIS10);
|
||||||
|
unsigned last_attribute = pinfo->num_attributes - 1;
|
||||||
|
|
||||||
|
if ((pinfo->misaligned_mask & (1u << last_attribute))) {
|
||||||
|
const struct ac_vtx_format_info* vtx_info = &vtx_info_table[pinfo->formats[last_attribute]];
|
||||||
|
if (vtx_info->chan_byte_size == 8 && vtx_info->num_channels > 2)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -393,9 +409,11 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||||
has_nontrivial_divisors && (program->gfx_level <= GFX8 || program->gfx_level >= GFX11);
|
has_nontrivial_divisors && (program->gfx_level <= GFX8 || program->gfx_level >= GFX11);
|
||||||
|
|
||||||
int vgpr_offset = pinfo->misaligned_mask & (1u << (pinfo->num_attributes - 1)) ? 0 : -4;
|
int vgpr_offset = pinfo->misaligned_mask & (1u << (pinfo->num_attributes - 1)) ? 0 : -4;
|
||||||
|
const bool is_last_attr_large = is_last_attribute_large(pinfo);
|
||||||
|
|
||||||
unsigned num_vgprs = args->num_vgprs_used;
|
unsigned num_vgprs = args->num_vgprs_used;
|
||||||
PhysReg attributes_start = get_next_vgpr(pinfo->num_attributes * 4, &num_vgprs);
|
PhysReg attributes_start =
|
||||||
|
get_next_vgpr(pinfo->num_attributes * 4 + (is_last_attr_large ? 4 : 0), &num_vgprs);
|
||||||
PhysReg vertex_index, instance_index, start_instance_vgpr, nontrivial_tmp_vgpr0,
|
PhysReg vertex_index, instance_index, start_instance_vgpr, nontrivial_tmp_vgpr0,
|
||||||
nontrivial_tmp_vgpr1;
|
nontrivial_tmp_vgpr1;
|
||||||
if (needs_vertex_index)
|
if (needs_vertex_index)
|
||||||
|
|
@ -625,6 +643,14 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||||
continue_pc = Operand(prolog_input, s2);
|
continue_pc = Operand(prolog_input, s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Wait for all pending VMEM loads when the prolog loads large 64-bit
|
||||||
|
* attributes because the vertex shader isn't required to consume all of
|
||||||
|
* them and they might be overwritten. This isn't the most optimal solution
|
||||||
|
* but 64-bit vertex attributes are rarely used.
|
||||||
|
*/
|
||||||
|
if (is_last_attr_large)
|
||||||
|
wait_for_vmem_loads(bld);
|
||||||
|
|
||||||
bld.sop1(aco_opcode::s_setpc_b64, continue_pc);
|
bld.sop1(aco_opcode::s_setpc_b64, continue_pc);
|
||||||
|
|
||||||
program->config->float_mode = program->blocks[0].fp_mode.val;
|
program->config->float_mode = program->blocks[0].fp_mode.val;
|
||||||
|
|
|
||||||
|
|
@ -1329,7 +1329,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
||||||
.ib_size_alignment = 32,
|
.ib_size_alignment = 32,
|
||||||
.available_rings = 0x1,
|
.available_rings = 0x1,
|
||||||
.ip_discovery_version = 0xb0000,
|
.ip_discovery_version = 0xb0000,
|
||||||
.userq_num_slots = 2,
|
|
||||||
},
|
},
|
||||||
.hw_ip_compute = {
|
.hw_ip_compute = {
|
||||||
.hw_ip_version_major = 11,
|
.hw_ip_version_major = 11,
|
||||||
|
|
@ -1339,7 +1338,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
||||||
.ib_size_alignment = 32,
|
.ib_size_alignment = 32,
|
||||||
.available_rings = 0xf,
|
.available_rings = 0xf,
|
||||||
.ip_discovery_version = 0xb0000,
|
.ip_discovery_version = 0xb0000,
|
||||||
.userq_num_slots = 16,
|
|
||||||
},
|
},
|
||||||
.fw_gfx_me = {
|
.fw_gfx_me = {
|
||||||
.ver = 1486,
|
.ver = 1486,
|
||||||
|
|
@ -1460,7 +1458,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
||||||
.ib_size_alignment = 32,
|
.ib_size_alignment = 32,
|
||||||
.available_rings = 0x1,
|
.available_rings = 0x1,
|
||||||
.ip_discovery_version = 0xb0002,
|
.ip_discovery_version = 0xb0002,
|
||||||
.userq_num_slots = 0x0,
|
|
||||||
},
|
},
|
||||||
.hw_ip_compute = {
|
.hw_ip_compute = {
|
||||||
.hw_ip_version_major = 11,
|
.hw_ip_version_major = 11,
|
||||||
|
|
@ -1470,7 +1467,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
||||||
.ib_size_alignment = 32,
|
.ib_size_alignment = 32,
|
||||||
.available_rings = 0xf,
|
.available_rings = 0xf,
|
||||||
.ip_discovery_version = 0xb0002,
|
.ip_discovery_version = 0xb0002,
|
||||||
.userq_num_slots = 0x0,
|
|
||||||
},
|
},
|
||||||
.fw_gfx_me = {
|
.fw_gfx_me = {
|
||||||
.ver = 2390,
|
.ver = 2390,
|
||||||
|
|
@ -2070,7 +2066,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
||||||
.ib_size_alignment = 32,
|
.ib_size_alignment = 32,
|
||||||
.available_rings = 0x1,
|
.available_rings = 0x1,
|
||||||
.ip_discovery_version = 0xb0500,
|
.ip_discovery_version = 0xb0500,
|
||||||
.userq_num_slots = 2,
|
|
||||||
},
|
},
|
||||||
.hw_ip_compute = {
|
.hw_ip_compute = {
|
||||||
.hw_ip_version_major = 11,
|
.hw_ip_version_major = 11,
|
||||||
|
|
@ -2080,7 +2075,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
||||||
.ib_size_alignment = 32,
|
.ib_size_alignment = 32,
|
||||||
.available_rings = 0xf,
|
.available_rings = 0xf,
|
||||||
.ip_discovery_version = 0xb0500,
|
.ip_discovery_version = 0xb0500,
|
||||||
.userq_num_slots = 16,
|
|
||||||
},
|
},
|
||||||
.fw_gfx_me = {
|
.fw_gfx_me = {
|
||||||
.ver = 29,
|
.ver = 29,
|
||||||
|
|
@ -2201,7 +2195,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
||||||
.ib_size_alignment = 32,
|
.ib_size_alignment = 32,
|
||||||
.available_rings = 0x1,
|
.available_rings = 0x1,
|
||||||
.ip_discovery_version = 0xc0001,
|
.ip_discovery_version = 0xc0001,
|
||||||
.userq_num_slots = 8,
|
|
||||||
},
|
},
|
||||||
.hw_ip_compute = {
|
.hw_ip_compute = {
|
||||||
.hw_ip_version_major = 12,
|
.hw_ip_version_major = 12,
|
||||||
|
|
@ -2211,7 +2204,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
||||||
.ib_size_alignment = 32,
|
.ib_size_alignment = 32,
|
||||||
.available_rings = 0xf,
|
.available_rings = 0xf,
|
||||||
.ip_discovery_version = 0xc0001,
|
.ip_discovery_version = 0xc0001,
|
||||||
.userq_num_slots = 8,
|
|
||||||
},
|
},
|
||||||
.fw_gfx_me = {
|
.fw_gfx_me = {
|
||||||
.ver = 2590,
|
.ver = 2590,
|
||||||
|
|
|
||||||
|
|
@ -379,7 +379,6 @@ amdgpu_dump_hw_ips(int fd)
|
||||||
printf(" .ib_size_alignment = %u,\n", info.ib_size_alignment);
|
printf(" .ib_size_alignment = %u,\n", info.ib_size_alignment);
|
||||||
printf(" .available_rings = 0x%x,\n", info.available_rings);
|
printf(" .available_rings = 0x%x,\n", info.available_rings);
|
||||||
printf(" .ip_discovery_version = 0x%04x,\n", info.ip_discovery_version);
|
printf(" .ip_discovery_version = 0x%04x,\n", info.ip_discovery_version);
|
||||||
printf(" .userq_num_slots = 0x%x,\n", info.userq_num_slots);
|
|
||||||
printf("},\n");
|
printf("},\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
35
src/amd/vulkan/layers/radv_no_mans_sky.c
Normal file
35
src/amd/vulkan/layers/radv_no_mans_sky.c
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2025 Valve Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "radv_device.h"
|
||||||
|
#include "radv_entrypoints.h"
|
||||||
|
#include "radv_image_view.h"
|
||||||
|
|
||||||
|
VKAPI_ATTR VkResult VKAPI_CALL
|
||||||
|
no_mans_sky_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
|
||||||
|
const VkAllocationCallbacks *pAllocator, VkImageView *pView)
|
||||||
|
{
|
||||||
|
VK_FROM_HANDLE(radv_device, device, _device);
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
result = device->layer_dispatch.app.CreateImageView(_device, pCreateInfo, pAllocator, pView);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
VK_FROM_HANDLE(radv_image_view, iview, *pView);
|
||||||
|
|
||||||
|
if ((iview->vk.aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) &&
|
||||||
|
(iview->vk.usage &
|
||||||
|
(VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))) {
|
||||||
|
/* No Man's Sky creates descriptors with depth/stencil aspects (only when Intel XESS is
|
||||||
|
* enabled apparently). and this is illegal in Vulkan. Ignore them by using NULL descriptors
|
||||||
|
* to workaroud GPU hangs.
|
||||||
|
*/
|
||||||
|
memset(&iview->descriptor, 0, sizeof(iview->descriptor));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
@ -21,6 +21,7 @@ radv_entrypoints_gen_command += [
|
||||||
'--device-prefix', 'metro_exodus',
|
'--device-prefix', 'metro_exodus',
|
||||||
'--device-prefix', 'rage2',
|
'--device-prefix', 'rage2',
|
||||||
'--device-prefix', 'quantic_dream',
|
'--device-prefix', 'quantic_dream',
|
||||||
|
'--device-prefix', 'no_mans_sky',
|
||||||
|
|
||||||
# Command buffer annotation layer entrypoints
|
# Command buffer annotation layer entrypoints
|
||||||
'--device-prefix', 'annotate',
|
'--device-prefix', 'annotate',
|
||||||
|
|
@ -40,6 +41,7 @@ libradv_files = files(
|
||||||
'layers/radv_metro_exodus.c',
|
'layers/radv_metro_exodus.c',
|
||||||
'layers/radv_rage2.c',
|
'layers/radv_rage2.c',
|
||||||
'layers/radv_quantic_dream.c',
|
'layers/radv_quantic_dream.c',
|
||||||
|
'layers/radv_no_mans_sky.c',
|
||||||
'layers/radv_rmv_layer.c',
|
'layers/radv_rmv_layer.c',
|
||||||
'layers/radv_rra_layer.c',
|
'layers/radv_rra_layer.c',
|
||||||
'layers/radv_sqtt_layer.c',
|
'layers/radv_sqtt_layer.c',
|
||||||
|
|
|
||||||
|
|
@ -6111,6 +6111,13 @@ radv_emit_tess_domain_origin_state(struct radv_cmd_buffer *cmd_buffer)
|
||||||
radeon_end();
|
radeon_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
radv_is_dual_src_enabled(const struct radv_dynamic_state *dynamic_state)
|
||||||
|
{
|
||||||
|
/* Dual-source blending must be ignored if blending isn't enabled for MRT0. */
|
||||||
|
return dynamic_state->blend_eq.mrt0_is_dual_src && !!(dynamic_state->color_blend_enable & 1u);
|
||||||
|
}
|
||||||
|
|
||||||
static struct radv_shader_part *
|
static struct radv_shader_part *
|
||||||
lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
|
lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
|
||||||
{
|
{
|
||||||
|
|
@ -6144,7 +6151,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
|
||||||
|
|
||||||
state.color_write_mask = d->color_write_mask;
|
state.color_write_mask = d->color_write_mask;
|
||||||
state.color_blend_enable = d->color_blend_enable;
|
state.color_blend_enable = d->color_blend_enable;
|
||||||
state.mrt0_is_dual_src = d->blend_eq.mrt0_is_dual_src;
|
state.mrt0_is_dual_src = radv_is_dual_src_enabled(&cmd_buffer->state.dynamic);
|
||||||
|
|
||||||
if (d->vk.ms.alpha_to_coverage_enable) {
|
if (d->vk.ms.alpha_to_coverage_enable) {
|
||||||
/* Select a color export format with alpha when alpha to coverage is enabled. */
|
/* Select a color export format with alpha when alpha to coverage is enabled. */
|
||||||
|
|
@ -8114,6 +8121,8 @@ radv_mark_descriptors_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPo
|
||||||
struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
|
struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
|
||||||
|
|
||||||
descriptors_state->dirty |= descriptors_state->valid;
|
descriptors_state->dirty |= descriptors_state->valid;
|
||||||
|
if (descriptors_state->dynamic_offset_count)
|
||||||
|
descriptors_state->dirty_dynamic = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -8642,7 +8651,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
||||||
|
|
||||||
if (cmd_buffer->state.compute_pipeline == compute_pipeline)
|
if (cmd_buffer->state.compute_pipeline == compute_pipeline)
|
||||||
return;
|
return;
|
||||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
|
||||||
|
|
||||||
radv_bind_shader(cmd_buffer, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE);
|
radv_bind_shader(cmd_buffer, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE);
|
||||||
|
|
||||||
|
|
@ -8656,7 +8664,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
||||||
|
|
||||||
if (cmd_buffer->state.rt_pipeline == rt_pipeline)
|
if (cmd_buffer->state.rt_pipeline == rt_pipeline)
|
||||||
return;
|
return;
|
||||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
|
||||||
|
|
||||||
radv_bind_shader(cmd_buffer, rt_pipeline->base.base.shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION);
|
radv_bind_shader(cmd_buffer, rt_pipeline->base.base.shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION);
|
||||||
radv_bind_rt_prolog(cmd_buffer, rt_pipeline->prolog);
|
radv_bind_rt_prolog(cmd_buffer, rt_pipeline->prolog);
|
||||||
|
|
@ -8690,7 +8697,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
||||||
|
|
||||||
if (cmd_buffer->state.graphics_pipeline == graphics_pipeline)
|
if (cmd_buffer->state.graphics_pipeline == graphics_pipeline)
|
||||||
return;
|
return;
|
||||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
|
||||||
|
|
||||||
radv_foreach_stage (
|
radv_foreach_stage (
|
||||||
stage, (cmd_buffer->state.active_stages | graphics_pipeline->active_stages) & RADV_GRAPHICS_STAGE_BITS) {
|
stage, (cmd_buffer->state.active_stages | graphics_pipeline->active_stages) & RADV_GRAPHICS_STAGE_BITS) {
|
||||||
|
|
@ -8744,6 +8750,8 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
||||||
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].dynamic_offset_count = pipeline->dynamic_offset_count;
|
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].dynamic_offset_count = pipeline->dynamic_offset_count;
|
||||||
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].need_indirect_descriptors =
|
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].need_indirect_descriptors =
|
||||||
pipeline->need_indirect_descriptors;
|
pipeline->need_indirect_descriptors;
|
||||||
|
|
||||||
|
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
VKAPI_ATTR void VKAPI_CALL
|
VKAPI_ATTR void VKAPI_CALL
|
||||||
|
|
@ -11688,7 +11696,7 @@ radv_emit_cb_render_state(struct radv_cmd_buffer *cmd_buffer)
|
||||||
const struct radv_rendering_state *render = &cmd_buffer->state.render;
|
const struct radv_rendering_state *render = &cmd_buffer->state.render;
|
||||||
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||||
unsigned cb_blend_control[MAX_RTS], sx_mrt_blend_opt[MAX_RTS];
|
unsigned cb_blend_control[MAX_RTS], sx_mrt_blend_opt[MAX_RTS];
|
||||||
const bool mrt0_is_dual_src = d->blend_eq.mrt0_is_dual_src;
|
const bool mrt0_is_dual_src = radv_is_dual_src_enabled(&cmd_buffer->state.dynamic);
|
||||||
uint32_t cb_color_control = 0;
|
uint32_t cb_color_control = 0;
|
||||||
|
|
||||||
const uint32_t cb_target_mask = d->color_write_enable & d->color_write_mask;
|
const uint32_t cb_target_mask = d->color_write_enable & d->color_write_mask;
|
||||||
|
|
|
||||||
|
|
@ -792,6 +792,8 @@ init_dispatch_tables(struct radv_device *device, struct radv_physical_device *pd
|
||||||
add_entrypoints(&b, &rage2_device_entrypoints, RADV_APP_DISPATCH_TABLE);
|
add_entrypoints(&b, &rage2_device_entrypoints, RADV_APP_DISPATCH_TABLE);
|
||||||
} else if (!strcmp(instance->drirc.debug.app_layer, "quanticdream")) {
|
} else if (!strcmp(instance->drirc.debug.app_layer, "quanticdream")) {
|
||||||
add_entrypoints(&b, &quantic_dream_device_entrypoints, RADV_APP_DISPATCH_TABLE);
|
add_entrypoints(&b, &quantic_dream_device_entrypoints, RADV_APP_DISPATCH_TABLE);
|
||||||
|
} else if (!strcmp(instance->drirc.debug.app_layer, "no_mans_sky")) {
|
||||||
|
add_entrypoints(&b, &no_mans_sky_device_entrypoints, RADV_APP_DISPATCH_TABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (instance->vk.trace_mode & RADV_TRACE_MODE_RGP)
|
if (instance->vk.trace_mode & RADV_TRACE_MODE_RGP)
|
||||||
|
|
|
||||||
|
|
@ -173,6 +173,7 @@ static const driOptionDescription radv_dri_options[] = {
|
||||||
DRI_CONF_VK_LOWER_TERMINATE_TO_DISCARD(false)
|
DRI_CONF_VK_LOWER_TERMINATE_TO_DISCARD(false)
|
||||||
DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
|
DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
|
||||||
DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
|
DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
|
||||||
|
DRI_CONF_VK_WSI_DISABLE_UNORDERED_SUBMITS(false)
|
||||||
DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
|
DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
|
||||||
DRI_CONF_VK_REQUIRE_ETC2(false)
|
DRI_CONF_VK_REQUIRE_ETC2(false)
|
||||||
DRI_CONF_VK_REQUIRE_ASTC(false)
|
DRI_CONF_VK_REQUIRE_ASTC(false)
|
||||||
|
|
@ -200,6 +201,7 @@ static const driOptionDescription radv_dri_options[] = {
|
||||||
DRI_CONF_RADV_EMULATE_RT(false)
|
DRI_CONF_RADV_EMULATE_RT(false)
|
||||||
DRI_CONF_RADV_ENABLE_FLOAT16_GFX8(false)
|
DRI_CONF_RADV_ENABLE_FLOAT16_GFX8(false)
|
||||||
DRI_CONF_RADV_COOPERATIVE_MATRIX2_NV(false)
|
DRI_CONF_RADV_COOPERATIVE_MATRIX2_NV(false)
|
||||||
|
DRI_CONF_RADV_NO_IMPLICIT_VARYING_SUBGROUP_SIZE(false)
|
||||||
DRI_CONF_SECTION_END
|
DRI_CONF_SECTION_END
|
||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
@ -236,6 +238,8 @@ radv_init_dri_debug_options(struct radv_instance *instance)
|
||||||
drirc->debug.ssbo_non_uniform = driQueryOptionb(&drirc->options, "radv_ssbo_non_uniform");
|
drirc->debug.ssbo_non_uniform = driQueryOptionb(&drirc->options, "radv_ssbo_non_uniform");
|
||||||
drirc->debug.tex_non_uniform = driQueryOptionb(&drirc->options, "radv_tex_non_uniform");
|
drirc->debug.tex_non_uniform = driQueryOptionb(&drirc->options, "radv_tex_non_uniform");
|
||||||
drirc->debug.zero_vram = driQueryOptionb(&drirc->options, "radv_zero_vram");
|
drirc->debug.zero_vram = driQueryOptionb(&drirc->options, "radv_zero_vram");
|
||||||
|
drirc->debug.no_implicit_varying_subgroup_size =
|
||||||
|
driQueryOptionb(&drirc->options, "radv_no_implicit_varying_subgroup_size");
|
||||||
drirc->debug.app_layer = driQueryOptionstr(&drirc->options, "radv_app_layer");
|
drirc->debug.app_layer = driQueryOptionstr(&drirc->options, "radv_app_layer");
|
||||||
|
|
||||||
drirc->debug.override_uniform_offset_alignment =
|
drirc->debug.override_uniform_offset_alignment =
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,7 @@ struct radv_drirc {
|
||||||
bool ssbo_non_uniform;
|
bool ssbo_non_uniform;
|
||||||
bool tex_non_uniform;
|
bool tex_non_uniform;
|
||||||
bool zero_vram;
|
bool zero_vram;
|
||||||
|
bool no_implicit_varying_subgroup_size;
|
||||||
char *app_layer;
|
char *app_layer;
|
||||||
int override_uniform_offset_alignment;
|
int override_uniform_offset_alignment;
|
||||||
} debug;
|
} debug;
|
||||||
|
|
|
||||||
|
|
@ -252,6 +252,7 @@ radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
|
||||||
key->use_llvm = pdev->use_llvm;
|
key->use_llvm = pdev->use_llvm;
|
||||||
key->use_ngg = pdev->use_ngg;
|
key->use_ngg = pdev->use_ngg;
|
||||||
key->use_ngg_culling = pdev->use_ngg_culling;
|
key->use_ngg_culling = pdev->use_ngg_culling;
|
||||||
|
key->no_implicit_varying_subgroup_size = instance->drirc.debug.no_implicit_varying_subgroup_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
|
|
||||||
|
|
@ -64,8 +64,9 @@ struct radv_physical_device_cache_key {
|
||||||
uint32_t use_llvm : 1;
|
uint32_t use_llvm : 1;
|
||||||
uint32_t use_ngg : 1;
|
uint32_t use_ngg : 1;
|
||||||
uint32_t use_ngg_culling : 1;
|
uint32_t use_ngg_culling : 1;
|
||||||
|
uint32_t no_implicit_varying_subgroup_size : 1;
|
||||||
|
|
||||||
uint32_t reserved : 10;
|
uint32_t reserved : 9;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum radv_video_enc_hw_ver {
|
enum radv_video_enc_hw_ver {
|
||||||
|
|
|
||||||
|
|
@ -1247,9 +1247,13 @@ radv_pipeline_report_pso_history(const struct radv_device *device, struct radv_p
|
||||||
case RADV_PIPELINE_RAY_TRACING: {
|
case RADV_PIPELINE_RAY_TRACING: {
|
||||||
struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
|
struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
|
||||||
|
|
||||||
radv_print_pso_history(pipeline, rt_pipeline->prolog, output);
|
if (rt_pipeline->prolog)
|
||||||
|
radv_print_pso_history(pipeline, rt_pipeline->prolog, output);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) {
|
if (pipeline->shaders[MESA_SHADER_INTERSECTION])
|
||||||
|
radv_print_pso_history(pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], output);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < rt_pipeline->non_imported_stage_count; i++) {
|
||||||
const struct radv_shader *shader = rt_pipeline->stages[i].shader;
|
const struct radv_shader *shader = rt_pipeline->stages[i].shader;
|
||||||
|
|
||||||
if (shader)
|
if (shader)
|
||||||
|
|
|
||||||
|
|
@ -2383,8 +2383,9 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR: {
|
case VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR: {
|
||||||
|
const bool write_memory = radv_video_write_memory_supported(pdev) == RADV_VIDEO_WRITE_MEMORY_SUPPORT_FULL;
|
||||||
uint32_t *src32 = (uint32_t *)src;
|
uint32_t *src32 = (uint32_t *)src;
|
||||||
uint32_t ready_idx = radv_video_write_memory_supported(pdev) ? RADV_ENC_FEEDBACK_STATUS_IDX : 1;
|
uint32_t ready_idx = write_memory ? RADV_ENC_FEEDBACK_STATUS_IDX : 1;
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
do {
|
do {
|
||||||
value = p_atomic_read(&src32[ready_idx]);
|
value = p_atomic_read(&src32[ready_idx]);
|
||||||
|
|
|
||||||
|
|
@ -367,6 +367,10 @@ radv_shader_choose_subgroup_size(struct radv_device *device, nir_shader *nir,
|
||||||
.requiredSubgroupSize = stage_key->subgroup_required_size * 32,
|
.requiredSubgroupSize = stage_key->subgroup_required_size * 32,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Do not allow for the SPIR-V 1.6 varying subgroup size rules. */
|
||||||
|
if (pdev->cache_key.no_implicit_varying_subgroup_size)
|
||||||
|
spirv_version = 0x10000;
|
||||||
|
|
||||||
vk_set_subgroup_size(&device->vk, nir, spirv_version, rss_info.requiredSubgroupSize ? &rss_info : NULL,
|
vk_set_subgroup_size(&device->vk, nir, spirv_version, rss_info.requiredSubgroupSize ? &rss_info : NULL,
|
||||||
stage_key->subgroup_allow_varying, stage_key->subgroup_require_full);
|
stage_key->subgroup_allow_varying, stage_key->subgroup_require_full);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -191,6 +191,11 @@ declare_vs_input_vgprs(enum amd_gfx_level gfx_level, const struct radv_shader_in
|
||||||
unsigned num_attributes = util_last_bit(info->vs.input_slot_usage_mask);
|
unsigned num_attributes = util_last_bit(info->vs.input_slot_usage_mask);
|
||||||
for (unsigned i = 0; i < num_attributes; i++) {
|
for (unsigned i = 0; i < num_attributes; i++) {
|
||||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_VALUE, &args->vs_inputs[i]);
|
ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_VALUE, &args->vs_inputs[i]);
|
||||||
|
|
||||||
|
/* The vertex shader isn't required to consume all components that are loaded by the prolog
|
||||||
|
* and it's possible that more VGPRs are written. This specific case is handled at the end
|
||||||
|
* of the prolog which waits for all pending VMEM loads if needed.
|
||||||
|
*/
|
||||||
args->ac.args[args->vs_inputs[i].arg_index].pending_vmem = true;
|
args->ac.args[args->vs_inputs[i].arg_index].pending_vmem = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -508,7 +508,9 @@ radv_begin_sqtt(struct radv_queue *queue)
|
||||||
device->sqtt.start_cs[family] = NULL;
|
device->sqtt.start_cs[family] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
|
radv_init_cmd_stream(&cs, radv_queue_ring(queue));
|
||||||
|
|
||||||
|
cs.b = ws->cs_create(ws, cs.hw_ip, false);
|
||||||
if (!cs.b)
|
if (!cs.b)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
@ -585,7 +587,9 @@ radv_end_sqtt(struct radv_queue *queue)
|
||||||
device->sqtt.stop_cs[family] = NULL;
|
device->sqtt.stop_cs[family] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
|
radv_init_cmd_stream(&cs, radv_queue_ring(queue));
|
||||||
|
|
||||||
|
cs.b = ws->cs_create(ws, cs.hw_ip, false);
|
||||||
if (!cs.b)
|
if (!cs.b)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -149,10 +149,16 @@ radv_vcn_write_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned
|
||||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||||
struct rvcn_sq_var sq;
|
struct rvcn_sq_var sq;
|
||||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||||
|
enum radv_video_write_memory_support support = radv_video_write_memory_supported(pdev);
|
||||||
|
|
||||||
if (!radv_video_write_memory_supported(pdev))
|
if (support == RADV_VIDEO_WRITE_MEMORY_SUPPORT_NONE)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (support == RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS) {
|
||||||
|
fprintf(stderr, "radv: VCN WRITE_MEMORY requires PCIe atomics support. Expect issues "
|
||||||
|
"if PCIe atomics are not enabled on current device.\n");
|
||||||
|
}
|
||||||
|
|
||||||
bool separate_queue = pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED;
|
bool separate_queue = pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED;
|
||||||
if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC && separate_queue && pdev->vid_dec_reg.data2) {
|
if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC && separate_queue && pdev->vid_dec_reg.data2) {
|
||||||
radeon_check_space(device->ws, cs->b, 8);
|
radeon_check_space(device->ws, cs->b, 8);
|
||||||
|
|
@ -819,6 +825,32 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons
|
||||||
if (cap && !cap->valid)
|
if (cap && !cap->valid)
|
||||||
cap = NULL;
|
cap = NULL;
|
||||||
|
|
||||||
|
if (cap) {
|
||||||
|
pCapabilities->maxCodedExtent.width = cap->max_width;
|
||||||
|
pCapabilities->maxCodedExtent.height = cap->max_height;
|
||||||
|
} else {
|
||||||
|
switch (pVideoProfile->videoCodecOperation) {
|
||||||
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
||||||
|
pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
|
||||||
|
pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
|
||||||
|
break;
|
||||||
|
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
||||||
|
pCapabilities->maxCodedExtent.width =
|
||||||
|
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||||
|
pCapabilities->maxCodedExtent.height =
|
||||||
|
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||||
|
break;
|
||||||
|
case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
|
||||||
|
pCapabilities->maxCodedExtent.width =
|
||||||
|
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||||
|
pCapabilities->maxCodedExtent.height =
|
||||||
|
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pCapabilities->flags = 0;
|
pCapabilities->flags = 0;
|
||||||
pCapabilities->pictureAccessGranularity.width = VK_VIDEO_H264_MACROBLOCK_WIDTH;
|
pCapabilities->pictureAccessGranularity.width = VK_VIDEO_H264_MACROBLOCK_WIDTH;
|
||||||
pCapabilities->pictureAccessGranularity.height = VK_VIDEO_H264_MACROBLOCK_HEIGHT;
|
pCapabilities->pictureAccessGranularity.height = VK_VIDEO_H264_MACROBLOCK_HEIGHT;
|
||||||
|
|
@ -1126,32 +1158,6 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cap) {
|
|
||||||
pCapabilities->maxCodedExtent.width = cap->max_width;
|
|
||||||
pCapabilities->maxCodedExtent.height = cap->max_height;
|
|
||||||
} else {
|
|
||||||
switch (pVideoProfile->videoCodecOperation) {
|
|
||||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
|
||||||
pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
|
|
||||||
pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
|
|
||||||
break;
|
|
||||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
|
||||||
pCapabilities->maxCodedExtent.width =
|
|
||||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
|
||||||
pCapabilities->maxCodedExtent.height =
|
|
||||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
|
||||||
break;
|
|
||||||
case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
|
|
||||||
pCapabilities->maxCodedExtent.width =
|
|
||||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
|
||||||
pCapabilities->maxCodedExtent.height =
|
|
||||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1746,8 +1752,10 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct
|
||||||
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
|
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
|
||||||
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
|
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
|
||||||
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
|
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
|
||||||
result.sps_max_dec_pic_buffering_minus1 =
|
if (sps->pDecPicBufMgr) {
|
||||||
sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
|
result.sps_max_dec_pic_buffering_minus1 =
|
||||||
|
sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
|
||||||
|
}
|
||||||
result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
|
result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
|
||||||
result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
|
result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
|
||||||
result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
|
result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
|
||||||
|
|
@ -1870,8 +1878,7 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
memset(&result, 0, sizeof(result));
|
memset(&result, 0, sizeof(result));
|
||||||
|
|
||||||
rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(probs_ptr);
|
rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(probs_ptr);
|
||||||
if (std_pic_info->flags.segmentation_enabled) {
|
if (std_pic_info->flags.segmentation_enabled && std_pic_info->pSegmentation) {
|
||||||
|
|
||||||
for (unsigned i = 0; i < 8; ++i) {
|
for (unsigned i = 0; i < 8; ++i) {
|
||||||
prbs->seg.feature_data[i] = (uint16_t)std_pic_info->pSegmentation->FeatureData[i][0] |
|
prbs->seg.feature_data[i] = (uint16_t)std_pic_info->pSegmentation->FeatureData[i][0] |
|
||||||
((uint32_t)(std_pic_info->pSegmentation->FeatureData[i][1] & 0xff) << 16) |
|
((uint32_t)(std_pic_info->pSegmentation->FeatureData[i][1] & 0xff) << 16) |
|
||||||
|
|
@ -1912,12 +1919,12 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
result.frame_header_flags |=
|
result.frame_header_flags |=
|
||||||
(std_pic_info->flags.refresh_frame_context << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) &
|
(std_pic_info->flags.refresh_frame_context << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) &
|
||||||
RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK;
|
RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK;
|
||||||
if (std_pic_info->flags.segmentation_enabled) {
|
|
||||||
assert(std_pic_info->pSegmentation);
|
|
||||||
result.frame_header_flags |=
|
|
||||||
(std_pic_info->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) &
|
|
||||||
RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK;
|
|
||||||
|
|
||||||
|
result.frame_header_flags |=
|
||||||
|
(std_pic_info->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) &
|
||||||
|
RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK;
|
||||||
|
|
||||||
|
if (std_pic_info->flags.segmentation_enabled && std_pic_info->pSegmentation) {
|
||||||
result.frame_header_flags |= (std_pic_info->pSegmentation->flags.segmentation_update_map
|
result.frame_header_flags |= (std_pic_info->pSegmentation->flags.segmentation_update_map
|
||||||
<< RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) &
|
<< RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) &
|
||||||
RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK;
|
RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK;
|
||||||
|
|
@ -1930,13 +1937,16 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
<< RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT) &
|
<< RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_SHIFT) &
|
||||||
RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK;
|
RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_DATA_MASK;
|
||||||
}
|
}
|
||||||
result.frame_header_flags |= (std_pic_info->pLoopFilter->flags.loop_filter_delta_enabled
|
|
||||||
<< RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) &
|
|
||||||
RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK;
|
|
||||||
|
|
||||||
result.frame_header_flags |= (std_pic_info->pLoopFilter->flags.loop_filter_delta_update
|
if (std_pic_info->pLoopFilter) {
|
||||||
<< RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) &
|
result.frame_header_flags |= (std_pic_info->pLoopFilter->flags.loop_filter_delta_enabled
|
||||||
RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK;
|
<< RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) &
|
||||||
|
RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK;
|
||||||
|
|
||||||
|
result.frame_header_flags |= (std_pic_info->pLoopFilter->flags.loop_filter_delta_update
|
||||||
|
<< RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) &
|
||||||
|
RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
result.frame_header_flags |=
|
result.frame_header_flags |=
|
||||||
(std_pic_info->flags.UsePrevFrameMvs << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) &
|
(std_pic_info->flags.UsePrevFrameMvs << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) &
|
||||||
|
|
@ -1949,26 +1959,31 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
result.frame_context_idx = std_pic_info->frame_context_idx;
|
result.frame_context_idx = std_pic_info->frame_context_idx;
|
||||||
result.reset_frame_context = std_pic_info->reset_frame_context;
|
result.reset_frame_context = std_pic_info->reset_frame_context;
|
||||||
|
|
||||||
result.filter_level = std_pic_info->pLoopFilter->loop_filter_level;
|
uint8_t loop_filter_level = 0;
|
||||||
result.sharpness_level = std_pic_info->pLoopFilter->loop_filter_sharpness;
|
|
||||||
|
|
||||||
int shifted = std_pic_info->pLoopFilter->loop_filter_level >= 32;
|
if (std_pic_info->pLoopFilter) {
|
||||||
|
loop_filter_level = std_pic_info->pLoopFilter->loop_filter_level;
|
||||||
|
result.filter_level = std_pic_info->pLoopFilter->loop_filter_level;
|
||||||
|
result.sharpness_level = std_pic_info->pLoopFilter->loop_filter_sharpness;
|
||||||
|
}
|
||||||
|
|
||||||
|
int shifted = loop_filter_level >= 32;
|
||||||
|
|
||||||
for (int i = 0; i < (std_pic_info->flags.segmentation_enabled ? 8 : 1); i++) {
|
for (int i = 0; i < (std_pic_info->flags.segmentation_enabled ? 8 : 1); i++) {
|
||||||
const uint8_t seg_lvl_alt_l = 1;
|
const uint8_t seg_lvl_alt_l = 1;
|
||||||
uint8_t lvl;
|
uint8_t lvl;
|
||||||
|
|
||||||
if (std_pic_info->flags.segmentation_enabled &&
|
if (std_pic_info->flags.segmentation_enabled && std_pic_info->pSegmentation &&
|
||||||
std_pic_info->pSegmentation->FeatureEnabled[i] & (1 << seg_lvl_alt_l)) {
|
std_pic_info->pSegmentation->FeatureEnabled[i] & (1 << seg_lvl_alt_l)) {
|
||||||
lvl = std_pic_info->pSegmentation->FeatureData[i][seg_lvl_alt_l];
|
lvl = std_pic_info->pSegmentation->FeatureData[i][seg_lvl_alt_l];
|
||||||
if (!std_pic_info->pSegmentation->flags.segmentation_abs_or_delta_update)
|
if (!std_pic_info->pSegmentation->flags.segmentation_abs_or_delta_update)
|
||||||
lvl += std_pic_info->pLoopFilter->loop_filter_level;
|
lvl += loop_filter_level;
|
||||||
lvl = CLAMP(lvl, 0, 63);
|
lvl = CLAMP(lvl, 0, 63);
|
||||||
} else {
|
} else {
|
||||||
lvl = std_pic_info->pLoopFilter->loop_filter_level;
|
lvl = loop_filter_level;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std_pic_info->pLoopFilter->flags.loop_filter_delta_enabled) {
|
if (std_pic_info->pLoopFilter && std_pic_info->pLoopFilter->flags.loop_filter_delta_enabled) {
|
||||||
result.lf_adj_level[i][0][0] = result.lf_adj_level[i][0][1] =
|
result.lf_adj_level[i][0][0] = result.lf_adj_level[i][0][1] =
|
||||||
CLAMP(lvl + (std_pic_info->pLoopFilter->loop_filter_ref_deltas[0] * (1 << shifted)), 0, 63);
|
CLAMP(lvl + (std_pic_info->pLoopFilter->loop_filter_ref_deltas[0] * (1 << shifted)), 0, 63);
|
||||||
for (int j = 1; j < 4; j++) {
|
for (int j = 1; j < 4; j++) {
|
||||||
|
|
@ -1995,7 +2010,8 @@ get_vp9_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
result.log2_tile_rows = std_pic_info->tile_rows_log2;
|
result.log2_tile_rows = std_pic_info->tile_rows_log2;
|
||||||
result.chroma_format = 1;
|
result.chroma_format = 1;
|
||||||
|
|
||||||
result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = (std_pic_info->pColorConfig->BitDepth - 8);
|
if (std_pic_info->pColorConfig)
|
||||||
|
result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = (std_pic_info->pColorConfig->BitDepth - 8);
|
||||||
result.vp9_frame_size = vp9_pic_info->uncompressedHeaderOffset;
|
result.vp9_frame_size = vp9_pic_info->uncompressedHeaderOffset;
|
||||||
|
|
||||||
result.compressed_header_size = vp9_pic_info->tilesOffset - vp9_pic_info->compressedHeaderOffset;
|
result.compressed_header_size = vp9_pic_info->tilesOffset - vp9_pic_info->compressedHeaderOffset;
|
||||||
|
|
@ -2082,16 +2098,20 @@ get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
(pi->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
|
(pi->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
|
||||||
RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
|
RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
|
||||||
|
|
||||||
result.frame_header_flags |=
|
if (seq_hdr->pColorConfig) {
|
||||||
(seq_hdr->pColorConfig->flags.mono_chrome << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
|
result.frame_header_flags |=
|
||||||
RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
|
(seq_hdr->pColorConfig->flags.mono_chrome << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
|
||||||
|
RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
result.frame_header_flags |= (pi->flags.skip_mode_present << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
|
result.frame_header_flags |= (pi->flags.skip_mode_present << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
|
||||||
RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
|
RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
|
||||||
|
|
||||||
result.frame_header_flags |=
|
if (pi->pQuantization) {
|
||||||
(pi->pQuantization->flags.using_qmatrix << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
|
result.frame_header_flags |=
|
||||||
RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
|
(pi->pQuantization->flags.using_qmatrix << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
|
||||||
|
RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
result.frame_header_flags |=
|
result.frame_header_flags |=
|
||||||
(seq_hdr->flags.enable_filter_intra << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
|
(seq_hdr->flags.enable_filter_intra << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
|
||||||
|
|
@ -2135,13 +2155,15 @@ get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
(pi->flags.force_integer_mv << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
|
(pi->flags.force_integer_mv << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
|
||||||
RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
|
RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
|
||||||
|
|
||||||
result.frame_header_flags |=
|
if (pi->pLoopFilter) {
|
||||||
(pi->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
|
result.frame_header_flags |=
|
||||||
RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
|
(pi->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
|
||||||
|
RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
|
||||||
|
|
||||||
result.frame_header_flags |=
|
result.frame_header_flags |=
|
||||||
(pi->pLoopFilter->flags.loop_filter_delta_update << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
|
(pi->pLoopFilter->flags.loop_filter_delta_update << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
|
||||||
RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
|
RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
result.frame_header_flags |= (pi->flags.delta_q_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
|
result.frame_header_flags |= (pi->flags.delta_q_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
|
||||||
RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
|
RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
|
||||||
|
|
@ -2201,50 +2223,59 @@ get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
|
|
||||||
result.sb_size = seq_hdr->flags.use_128x128_superblock;
|
result.sb_size = seq_hdr->flags.use_128x128_superblock;
|
||||||
result.interp_filter = pi->interpolation_filter;
|
result.interp_filter = pi->interpolation_filter;
|
||||||
for (i = 0; i < 2; ++i)
|
|
||||||
result.filter_level[i] = pi->pLoopFilter->loop_filter_level[i];
|
|
||||||
result.filter_level_u = pi->pLoopFilter->loop_filter_level[2];
|
|
||||||
result.filter_level_v = pi->pLoopFilter->loop_filter_level[3];
|
|
||||||
result.sharpness_level = pi->pLoopFilter->loop_filter_sharpness;
|
|
||||||
for (i = 0; i < 8; ++i)
|
|
||||||
result.ref_deltas[i] = pi->pLoopFilter->loop_filter_ref_deltas[i];
|
|
||||||
for (i = 0; i < 2; ++i)
|
|
||||||
result.mode_deltas[i] = pi->pLoopFilter->loop_filter_mode_deltas[i];
|
|
||||||
result.base_qindex = pi->pQuantization->base_q_idx;
|
|
||||||
result.y_dc_delta_q = pi->pQuantization->DeltaQYDc;
|
|
||||||
result.u_dc_delta_q = pi->pQuantization->DeltaQUDc;
|
|
||||||
result.v_dc_delta_q = pi->pQuantization->DeltaQVDc;
|
|
||||||
result.u_ac_delta_q = pi->pQuantization->DeltaQUAc;
|
|
||||||
result.v_ac_delta_q = pi->pQuantization->DeltaQVAc;
|
|
||||||
|
|
||||||
if (pi->pQuantization->flags.using_qmatrix) {
|
if (pi->pLoopFilter) {
|
||||||
result.qm_y = pi->pQuantization->qm_y | 0xf0;
|
for (i = 0; i < 2; ++i)
|
||||||
result.qm_u = pi->pQuantization->qm_u | 0xf0;
|
result.filter_level[i] = pi->pLoopFilter->loop_filter_level[i];
|
||||||
result.qm_v = pi->pQuantization->qm_v | 0xf0;
|
result.filter_level_u = pi->pLoopFilter->loop_filter_level[2];
|
||||||
} else {
|
result.filter_level_v = pi->pLoopFilter->loop_filter_level[3];
|
||||||
result.qm_y = 0xff;
|
result.sharpness_level = pi->pLoopFilter->loop_filter_sharpness;
|
||||||
result.qm_u = 0xff;
|
for (i = 0; i < 8; ++i)
|
||||||
result.qm_v = 0xff;
|
result.ref_deltas[i] = pi->pLoopFilter->loop_filter_ref_deltas[i];
|
||||||
|
for (i = 0; i < 2; ++i)
|
||||||
|
result.mode_deltas[i] = pi->pLoopFilter->loop_filter_mode_deltas[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result.qm_y = 0xff;
|
||||||
|
result.qm_u = 0xff;
|
||||||
|
result.qm_v = 0xff;
|
||||||
|
|
||||||
|
if (pi->pQuantization) {
|
||||||
|
result.base_qindex = pi->pQuantization->base_q_idx;
|
||||||
|
result.y_dc_delta_q = pi->pQuantization->DeltaQYDc;
|
||||||
|
result.u_dc_delta_q = pi->pQuantization->DeltaQUDc;
|
||||||
|
result.v_dc_delta_q = pi->pQuantization->DeltaQVDc;
|
||||||
|
result.u_ac_delta_q = pi->pQuantization->DeltaQUAc;
|
||||||
|
result.v_ac_delta_q = pi->pQuantization->DeltaQVAc;
|
||||||
|
|
||||||
|
if (pi->pQuantization->flags.using_qmatrix) {
|
||||||
|
result.qm_y = pi->pQuantization->qm_y | 0xf0;
|
||||||
|
result.qm_u = pi->pQuantization->qm_u | 0xf0;
|
||||||
|
result.qm_v = pi->pQuantization->qm_v | 0xf0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
result.delta_q_res = (1 << pi->delta_q_res);
|
result.delta_q_res = (1 << pi->delta_q_res);
|
||||||
result.delta_lf_res = (1 << pi->delta_lf_res);
|
result.delta_lf_res = (1 << pi->delta_lf_res);
|
||||||
result.tile_cols = pi->pTileInfo->TileCols;
|
|
||||||
result.tile_rows = pi->pTileInfo->TileRows;
|
|
||||||
|
|
||||||
result.tx_mode = pi->TxMode;
|
result.tx_mode = pi->TxMode;
|
||||||
result.reference_mode = (pi->flags.reference_select == 1) ? 2 : 0;
|
result.reference_mode = (pi->flags.reference_select == 1) ? 2 : 0;
|
||||||
result.chroma_format = seq_hdr->pColorConfig->flags.mono_chrome ? 0 : 1;
|
|
||||||
result.tile_size_bytes = pi->pTileInfo->tile_size_bytes_minus_1;
|
|
||||||
result.context_update_tile_id = pi->pTileInfo->context_update_tile_id;
|
|
||||||
|
|
||||||
for (i = 0; i < result.tile_cols; i++)
|
if (pi->pTileInfo) {
|
||||||
result.tile_col_start_sb[i] = pi->pTileInfo->pMiColStarts[i];
|
result.tile_cols = pi->pTileInfo->TileCols;
|
||||||
result.tile_col_start_sb[result.tile_cols] =
|
result.tile_rows = pi->pTileInfo->TileRows;
|
||||||
result.tile_col_start_sb[result.tile_cols - 1] + pi->pTileInfo->pWidthInSbsMinus1[result.tile_cols - 1] + 1;
|
result.tile_size_bytes = pi->pTileInfo->tile_size_bytes_minus_1;
|
||||||
for (i = 0; i < pi->pTileInfo->TileRows; i++)
|
result.context_update_tile_id = pi->pTileInfo->context_update_tile_id;
|
||||||
result.tile_row_start_sb[i] = pi->pTileInfo->pMiRowStarts[i];
|
|
||||||
result.tile_row_start_sb[result.tile_rows] =
|
for (i = 0; i < result.tile_cols; i++)
|
||||||
result.tile_row_start_sb[result.tile_rows - 1] + pi->pTileInfo->pHeightInSbsMinus1[result.tile_rows - 1] + 1;
|
result.tile_col_start_sb[i] = pi->pTileInfo->pMiColStarts[i];
|
||||||
|
result.tile_col_start_sb[result.tile_cols] =
|
||||||
|
result.tile_col_start_sb[result.tile_cols - 1] + pi->pTileInfo->pWidthInSbsMinus1[result.tile_cols - 1] + 1;
|
||||||
|
for (i = 0; i < pi->pTileInfo->TileRows; i++)
|
||||||
|
result.tile_row_start_sb[i] = pi->pTileInfo->pMiRowStarts[i];
|
||||||
|
result.tile_row_start_sb[result.tile_rows] =
|
||||||
|
result.tile_row_start_sb[result.tile_rows - 1] + pi->pTileInfo->pHeightInSbsMinus1[result.tile_rows - 1] + 1;
|
||||||
|
}
|
||||||
|
|
||||||
result.max_width = seq_hdr->max_frame_width_minus_1 + 1;
|
result.max_width = seq_hdr->max_frame_width_minus_1 + 1;
|
||||||
result.max_height = seq_hdr->max_frame_height_minus_1 + 1;
|
result.max_height = seq_hdr->max_frame_height_minus_1 + 1;
|
||||||
|
|
@ -2294,24 +2325,26 @@ get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
av1_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : av1_pic_info->referenceNameSlotIndices[i];
|
av1_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : av1_pic_info->referenceNameSlotIndices[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = seq_hdr->pColorConfig->BitDepth - 8;
|
if (pi->pSegmentation) {
|
||||||
|
int16_t *feature_data = (int16_t *)probs_ptr;
|
||||||
int16_t *feature_data = (int16_t *)probs_ptr;
|
int fd_idx = 0;
|
||||||
int fd_idx = 0;
|
for (i = 0; i < 8; ++i) {
|
||||||
for (i = 0; i < 8; ++i) {
|
result.feature_mask[i] = pi->pSegmentation->FeatureEnabled[i];
|
||||||
result.feature_mask[i] = pi->pSegmentation->FeatureEnabled[i];
|
for (j = 0; j < 8; ++j) {
|
||||||
for (j = 0; j < 8; ++j) {
|
result.feature_data[i][j] = pi->pSegmentation->FeatureData[i][j];
|
||||||
result.feature_data[i][j] = pi->pSegmentation->FeatureData[i][j];
|
feature_data[fd_idx++] = result.feature_data[i][j];
|
||||||
feature_data[fd_idx++] = result.feature_data[i][j];
|
}
|
||||||
}
|
}
|
||||||
|
memcpy(((char *)probs_ptr + 128), result.feature_mask, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(((char *)probs_ptr + 128), result.feature_mask, 8);
|
if (pi->pCDEF) {
|
||||||
result.cdef_damping = pi->pCDEF->cdef_damping_minus_3 + 3;
|
result.cdef_damping = pi->pCDEF->cdef_damping_minus_3 + 3;
|
||||||
result.cdef_bits = pi->pCDEF->cdef_bits;
|
result.cdef_bits = pi->pCDEF->cdef_bits;
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
result.cdef_strengths[i] = (pi->pCDEF->cdef_y_pri_strength[i] << 2) + pi->pCDEF->cdef_y_sec_strength[i];
|
result.cdef_strengths[i] = (pi->pCDEF->cdef_y_pri_strength[i] << 2) + pi->pCDEF->cdef_y_sec_strength[i];
|
||||||
result.cdef_uv_strengths[i] = (pi->pCDEF->cdef_uv_pri_strength[i] << 2) + pi->pCDEF->cdef_uv_sec_strength[i];
|
result.cdef_uv_strengths[i] = (pi->pCDEF->cdef_uv_pri_strength[i] << 2) + pi->pCDEF->cdef_uv_sec_strength[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pi->flags.UsesLr) {
|
if (pi->flags.UsesLr) {
|
||||||
|
|
@ -2321,9 +2354,13 @@ get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (seq_hdr->pColorConfig->BitDepth > 8) {
|
if (seq_hdr->pColorConfig) {
|
||||||
result.p010_mode = 1;
|
result.chroma_format = seq_hdr->pColorConfig->flags.mono_chrome ? 0 : 1;
|
||||||
result.msb_mode = 1;
|
result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = seq_hdr->pColorConfig->BitDepth - 8;
|
||||||
|
if (seq_hdr->pColorConfig->BitDepth > 8) {
|
||||||
|
result.p010_mode = 1;
|
||||||
|
result.msb_mode = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result.preskip_segid = 0;
|
result.preskip_segid = 0;
|
||||||
|
|
@ -2355,7 +2392,7 @@ get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
|
|
||||||
rvcn_dec_film_grain_params_t *fg_params = &result.film_grain;
|
rvcn_dec_film_grain_params_t *fg_params = &result.film_grain;
|
||||||
fg_params->apply_grain = pi->flags.apply_grain;
|
fg_params->apply_grain = pi->flags.apply_grain;
|
||||||
if (fg_params->apply_grain) {
|
if (fg_params->apply_grain && pi->pFilmGrain) {
|
||||||
rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)((char *)probs_ptr + 256);
|
rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)((char *)probs_ptr + 256);
|
||||||
fg_params->random_seed = pi->pFilmGrain->grain_seed;
|
fg_params->random_seed = pi->pFilmGrain->grain_seed;
|
||||||
fg_params->grain_scale_shift = pi->pFilmGrain->grain_scale_shift;
|
fg_params->grain_scale_shift = pi->pFilmGrain->grain_scale_shift;
|
||||||
|
|
@ -2401,10 +2438,12 @@ get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct v
|
||||||
}
|
}
|
||||||
|
|
||||||
result.uncompressed_header_size = 0;
|
result.uncompressed_header_size = 0;
|
||||||
for (i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; ++i) {
|
if (pi->pGlobalMotion) {
|
||||||
result.global_motion[i].wmtype = pi->pGlobalMotion->GmType[i];
|
for (i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; ++i) {
|
||||||
for (j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; ++j)
|
result.global_motion[i].wmtype = pi->pGlobalMotion->GmType[i];
|
||||||
result.global_motion[i].wmmat[j] = pi->pGlobalMotion->gm_params[i][j];
|
for (j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; ++j)
|
||||||
|
result.global_motion[i].wmmat[j] = pi->pGlobalMotion->gm_params[i][j];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (i = 0; i < av1_pic_info->tileCount && i < 256; ++i) {
|
for (i = 0; i < av1_pic_info->tileCount && i < 256; ++i) {
|
||||||
result.tile_info[i].offset = av1_pic_info->pTileOffsets[i];
|
result.tile_info[i].offset = av1_pic_info->pTileOffsets[i];
|
||||||
|
|
@ -2671,8 +2710,8 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
||||||
* It will not perform any actual writes to these dummy slots.
|
* It will not perform any actual writes to these dummy slots.
|
||||||
*/
|
*/
|
||||||
for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
|
for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
|
||||||
dynamic_dpb_t2->dpbAddrHi[i] = addr;
|
dynamic_dpb_t2->dpbAddrLo[i] = addr;
|
||||||
dynamic_dpb_t2->dpbAddrLo[i] = addr >> 32;
|
dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2918,8 +2957,10 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str
|
||||||
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
|
result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
|
||||||
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
|
result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
|
||||||
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
|
result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
|
||||||
result.sps_max_dec_pic_buffering_minus1 =
|
if (sps->pDecPicBufMgr) {
|
||||||
sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
|
result.sps_max_dec_pic_buffering_minus1 =
|
||||||
|
sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
|
||||||
|
}
|
||||||
result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
|
result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
|
||||||
result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
|
result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
|
||||||
result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
|
result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
|
||||||
|
|
|
||||||
|
|
@ -73,6 +73,19 @@ struct radv_video_session {
|
||||||
bool session_initialized;
|
bool session_initialized;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WRITE_MEMORY support in FW.
|
||||||
|
*
|
||||||
|
* none: Not supported at all. Old VCN FW and all UVD.
|
||||||
|
* pcie_atomics: Supported, relies on PCIe atomics.
|
||||||
|
* full: Supported, works also without PCIe atomics.
|
||||||
|
*/
|
||||||
|
enum radv_video_write_memory_support {
|
||||||
|
RADV_VIDEO_WRITE_MEMORY_SUPPORT_NONE = 0,
|
||||||
|
RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS,
|
||||||
|
RADV_VIDEO_WRITE_MEMORY_SUPPORT_FULL,
|
||||||
|
};
|
||||||
|
|
||||||
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session, vk.base, VkVideoSessionKHR, VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
|
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session, vk.base, VkVideoSessionKHR, VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
|
||||||
|
|
||||||
void radv_init_physical_device_decoder(struct radv_physical_device *pdev);
|
void radv_init_physical_device_decoder(struct radv_physical_device *pdev);
|
||||||
|
|
@ -98,7 +111,7 @@ void radv_video_get_enc_dpb_image(struct radv_device *device, const struct VkVid
|
||||||
bool radv_video_decode_vp9_supported(const struct radv_physical_device *pdev);
|
bool radv_video_decode_vp9_supported(const struct radv_physical_device *pdev);
|
||||||
bool radv_video_encode_av1_supported(const struct radv_physical_device *pdev);
|
bool radv_video_encode_av1_supported(const struct radv_physical_device *pdev);
|
||||||
bool radv_video_encode_qp_map_supported(const struct radv_physical_device *pdev);
|
bool radv_video_encode_qp_map_supported(const struct radv_physical_device *pdev);
|
||||||
bool radv_video_write_memory_supported(const struct radv_physical_device *pdev);
|
enum radv_video_write_memory_support radv_video_write_memory_supported(const struct radv_physical_device *pdev);
|
||||||
uint32_t radv_video_get_qp_map_texel_size(VkVideoCodecOperationFlagBitsKHR codec);
|
uint32_t radv_video_get_qp_map_texel_size(VkVideoCodecOperationFlagBitsKHR codec);
|
||||||
bool radv_check_vcn_fw_version(const struct radv_physical_device *pdev, uint32_t dec, uint32_t enc, uint32_t rev);
|
bool radv_check_vcn_fw_version(const struct radv_physical_device *pdev, uint32_t dec, uint32_t enc, uint32_t rev);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@
|
||||||
#define ENC_ALIGNMENT 256
|
#define ENC_ALIGNMENT 256
|
||||||
|
|
||||||
#define RENCODE_V5_FW_INTERFACE_MAJOR_VERSION 1
|
#define RENCODE_V5_FW_INTERFACE_MAJOR_VERSION 1
|
||||||
#define RENCODE_V5_FW_INTERFACE_MINOR_VERSION 3
|
#define RENCODE_V5_FW_INTERFACE_MINOR_VERSION 10
|
||||||
|
|
||||||
#define RENCODE_V4_FW_INTERFACE_MAJOR_VERSION 1
|
#define RENCODE_V4_FW_INTERFACE_MAJOR_VERSION 1
|
||||||
#define RENCODE_V4_FW_INTERFACE_MINOR_VERSION 11
|
#define RENCODE_V4_FW_INTERFACE_MINOR_VERSION 11
|
||||||
|
|
@ -67,31 +67,6 @@ radv_probe_video_encode(struct radv_physical_device *pdev)
|
||||||
if (instance->debug_flags & RADV_DEBUG_NO_VIDEO)
|
if (instance->debug_flags & RADV_DEBUG_NO_VIDEO)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (pdev->info.vcn_ip_version >= VCN_5_0_0) {
|
|
||||||
pdev->video_encode_enabled = true;
|
|
||||||
return;
|
|
||||||
} else if (pdev->info.vcn_ip_version >= VCN_4_0_0) {
|
|
||||||
if (pdev->info.vcn_enc_major_version != RENCODE_V4_FW_INTERFACE_MAJOR_VERSION)
|
|
||||||
return;
|
|
||||||
if (pdev->info.vcn_enc_minor_version < RENCODE_V4_FW_INTERFACE_MINOR_VERSION)
|
|
||||||
return;
|
|
||||||
} else if (pdev->info.vcn_ip_version >= VCN_3_0_0) {
|
|
||||||
if (pdev->info.vcn_enc_major_version != RENCODE_V3_FW_INTERFACE_MAJOR_VERSION)
|
|
||||||
return;
|
|
||||||
if (pdev->info.vcn_enc_minor_version < RENCODE_V3_FW_INTERFACE_MINOR_VERSION)
|
|
||||||
return;
|
|
||||||
} else if (pdev->info.vcn_ip_version >= VCN_2_0_0) {
|
|
||||||
if (pdev->info.vcn_enc_major_version != RENCODE_V2_FW_INTERFACE_MAJOR_VERSION)
|
|
||||||
return;
|
|
||||||
if (pdev->info.vcn_enc_minor_version < RENCODE_V2_FW_INTERFACE_MINOR_VERSION)
|
|
||||||
return;
|
|
||||||
} else {
|
|
||||||
if (pdev->info.vcn_enc_major_version != RENCODE_FW_INTERFACE_MAJOR_VERSION)
|
|
||||||
return;
|
|
||||||
if (pdev->info.vcn_enc_minor_version < RENCODE_FW_INTERFACE_MINOR_VERSION)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* WRITE_MEMORY is needed for SetEvent and is required to pass CTS */
|
/* WRITE_MEMORY is needed for SetEvent and is required to pass CTS */
|
||||||
if (radv_video_write_memory_supported(pdev)) {
|
if (radv_video_write_memory_supported(pdev)) {
|
||||||
pdev->video_encode_enabled = true;
|
pdev->video_encode_enabled = true;
|
||||||
|
|
@ -495,10 +470,10 @@ radv_enc_session_init(struct radv_cmd_buffer *cmd_buffer, const struct VkVideoEn
|
||||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3)
|
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3)
|
||||||
RADEON_ENC_CS(vid->enc_session.slice_output_enabled);
|
RADEON_ENC_CS(vid->enc_session.slice_output_enabled);
|
||||||
RADEON_ENC_CS(vid->enc_session.display_remote);
|
RADEON_ENC_CS(vid->enc_session.display_remote);
|
||||||
if (pdev->enc_hw_ver == RADV_VIDEO_ENC_HW_4) {
|
if (pdev->enc_hw_ver == RADV_VIDEO_ENC_HW_4)
|
||||||
RADEON_ENC_CS(vid->enc_session.WA_flags);
|
RADEON_ENC_CS(vid->enc_session.WA_flags);
|
||||||
|
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
|
||||||
RADEON_ENC_CS(0);
|
RADEON_ENC_CS(0);
|
||||||
}
|
|
||||||
RADEON_ENC_END();
|
RADEON_ENC_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -890,7 +865,6 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
||||||
uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
|
uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
|
||||||
const struct VkVideoEncodeH264PictureInfoKHR *h264_picture_info =
|
const struct VkVideoEncodeH264PictureInfoKHR *h264_picture_info =
|
||||||
vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_H264_PICTURE_INFO_KHR);
|
vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_H264_PICTURE_INFO_KHR);
|
||||||
int slice_count = h264_picture_info->naluSliceEntryCount;
|
|
||||||
const StdVideoEncodeH264PictureInfo *pic = h264_picture_info->pStdPictureInfo;
|
const StdVideoEncodeH264PictureInfo *pic = h264_picture_info->pStdPictureInfo;
|
||||||
const StdVideoH264SequenceParameterSet *sps =
|
const StdVideoH264SequenceParameterSet *sps =
|
||||||
vk_video_find_h264_enc_std_sps(cmd_buffer->video.params, pic->seq_parameter_set_id);
|
vk_video_find_h264_enc_std_sps(cmd_buffer->video.params, pic->seq_parameter_set_id);
|
||||||
|
|
@ -903,8 +877,6 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
||||||
unsigned int cdw_filled = 0;
|
unsigned int cdw_filled = 0;
|
||||||
unsigned int bits_copied = 0;
|
unsigned int bits_copied = 0;
|
||||||
|
|
||||||
assert(slice_count <= 1);
|
|
||||||
|
|
||||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||||
|
|
@ -1080,6 +1052,9 @@ radv_enc_hevc_st_ref_pic_set(struct radv_cmd_buffer *cmd_buffer, const StdVideoH
|
||||||
unsigned int num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
|
unsigned int num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
|
||||||
unsigned int index = num_short_term_ref_pic_sets;
|
unsigned int index = num_short_term_ref_pic_sets;
|
||||||
|
|
||||||
|
if (!rps)
|
||||||
|
return 0;
|
||||||
|
|
||||||
if (index != 0)
|
if (index != 0)
|
||||||
radv_enc_code_fixed_bits(cmd_buffer, rps->flags.inter_ref_pic_set_prediction_flag, 0x1);
|
radv_enc_code_fixed_bits(cmd_buffer, rps->flags.inter_ref_pic_set_prediction_flag, 0x1);
|
||||||
|
|
||||||
|
|
@ -2275,6 +2250,7 @@ radv_enc_params_av1(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoK
|
||||||
RADEON_ENC_CS(av1_picture_info->referenceNameSlotIndices[i]);
|
RADEON_ENC_CS(av1_picture_info->referenceNameSlotIndices[i]);
|
||||||
RADEON_ENC_CS(slot_idx_0);
|
RADEON_ENC_CS(slot_idx_0);
|
||||||
RADEON_ENC_CS(slot_idx_1);
|
RADEON_ENC_CS(slot_idx_1);
|
||||||
|
RADEON_ENC_CS(av1_picture_info->pStdPictureInfo->order_hint);
|
||||||
RADEON_ENC_END();
|
RADEON_ENC_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2792,7 +2768,7 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
||||||
cmd_buffer->video.enc.total_task_size = 0;
|
cmd_buffer->video.enc.total_task_size = 0;
|
||||||
|
|
||||||
// task info
|
// task info
|
||||||
radv_enc_task_info(cmd_buffer, true);
|
radv_enc_task_info(cmd_buffer, feedback_query_va);
|
||||||
|
|
||||||
if (vid->enc_need_begin) {
|
if (vid->enc_need_begin) {
|
||||||
begin(cmd_buffer, enc_info);
|
begin(cmd_buffer, enc_info);
|
||||||
|
|
@ -2861,7 +2837,8 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
||||||
|
|
||||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_2) {
|
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_2) {
|
||||||
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
|
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
|
||||||
radv_vcn_write_memory(cmd_buffer, feedback_query_va + RADV_ENC_FEEDBACK_STATUS_IDX * sizeof(uint32_t), 1);
|
if (feedback_query_va && radv_video_write_memory_supported(pdev) == RADV_VIDEO_WRITE_MEMORY_SUPPORT_FULL)
|
||||||
|
radv_vcn_write_memory(cmd_buffer, feedback_query_va + RADV_ENC_FEEDBACK_STATUS_IDX * sizeof(uint32_t), 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3166,6 +3143,36 @@ radv_video_patch_encode_session_parameters(struct radv_device *device, struct vk
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: {
|
case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: {
|
||||||
|
for (unsigned i = 0; i < params->h265_enc.h265_sps_count; i++) {
|
||||||
|
uint32_t pic_width_in_luma_samples =
|
||||||
|
params->h265_enc.h265_sps[i].base.pic_width_in_luma_samples;
|
||||||
|
uint32_t pic_height_in_luma_samples =
|
||||||
|
params->h265_enc.h265_sps[i].base.pic_height_in_luma_samples;
|
||||||
|
uint32_t aligned_pic_width = align(pic_width_in_luma_samples, 64);
|
||||||
|
uint32_t aligned_pic_height = align(pic_height_in_luma_samples, 16);
|
||||||
|
|
||||||
|
/* Override the unaligned pic_{width,height} and make up for it with conformance window
|
||||||
|
* cropping */
|
||||||
|
params->h265_enc.h265_sps[i].base.pic_width_in_luma_samples = aligned_pic_width;
|
||||||
|
params->h265_enc.h265_sps[i].base.pic_height_in_luma_samples = aligned_pic_height;
|
||||||
|
|
||||||
|
if (aligned_pic_width != pic_width_in_luma_samples ||
|
||||||
|
aligned_pic_height != pic_height_in_luma_samples) {
|
||||||
|
params->h265_enc.h265_sps[i].base.flags.conformance_window_flag = 1;
|
||||||
|
params->h265_enc.h265_sps[i].base.conf_win_right_offset +=
|
||||||
|
(aligned_pic_width - pic_width_in_luma_samples) / 2;
|
||||||
|
params->h265_enc.h265_sps[i].base.conf_win_bottom_offset +=
|
||||||
|
(aligned_pic_height - pic_height_in_luma_samples) / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* VCN supports only the following block sizes (resulting in 64x64 CTBs with any coding
|
||||||
|
* block size) */
|
||||||
|
params->h265_enc.h265_sps[i].base.log2_min_luma_coding_block_size_minus3 = 0;
|
||||||
|
params->h265_enc.h265_sps[i].base.log2_diff_max_min_luma_coding_block_size = 3;
|
||||||
|
params->h265_enc.h265_sps[i].base.log2_min_luma_transform_block_size_minus2 = 0;
|
||||||
|
params->h265_enc.h265_sps[i].base.log2_diff_max_min_luma_transform_block_size = 3;
|
||||||
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < params->h265_enc.h265_pps_count; i++) {
|
for (unsigned i = 0; i < params->h265_enc.h265_pps_count; i++) {
|
||||||
/* cu_qp_delta needs to be enabled if rate control is enabled. VCN2 and newer can also enable
|
/* cu_qp_delta needs to be enabled if rate control is enabled. VCN2 and newer can also enable
|
||||||
* it with rate control disabled. Since we don't know what rate control will be used, we
|
* it with rate control disabled. Since we don't know what rate control will be used, we
|
||||||
|
|
@ -3268,6 +3275,14 @@ radv_GetEncodedVideoSessionParametersKHR(VkDevice device,
|
||||||
assert(sps);
|
assert(sps);
|
||||||
char *data_ptr = pData ? (char *)pData + vps_size : NULL;
|
char *data_ptr = pData ? (char *)pData + vps_size : NULL;
|
||||||
vk_video_encode_h265_sps(sps, size_limit, &sps_size, data_ptr);
|
vk_video_encode_h265_sps(sps, size_limit, &sps_size, data_ptr);
|
||||||
|
|
||||||
|
if (pFeedbackInfo) {
|
||||||
|
struct VkVideoEncodeH265SessionParametersFeedbackInfoKHR *h265_feedback_info =
|
||||||
|
vk_find_struct(pFeedbackInfo->pNext, VIDEO_ENCODE_H265_SESSION_PARAMETERS_FEEDBACK_INFO_KHR);
|
||||||
|
pFeedbackInfo->hasOverrides = VK_TRUE;
|
||||||
|
if (h265_feedback_info)
|
||||||
|
h265_feedback_info->hasStdSPSOverrides = VK_TRUE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (h265_get_info->writeStdPPS) {
|
if (h265_get_info->writeStdPPS) {
|
||||||
const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_enc_std_pps(templ, h265_get_info->stdPPSId);
|
const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_enc_std_pps(templ, h265_get_info->stdPPSId);
|
||||||
|
|
@ -3421,17 +3436,20 @@ radv_video_encode_qp_map_supported(const struct radv_physical_device *pdev)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
enum radv_video_write_memory_support
|
||||||
radv_video_write_memory_supported(const struct radv_physical_device *pdev)
|
radv_video_write_memory_supported(const struct radv_physical_device *pdev)
|
||||||
{
|
{
|
||||||
if (pdev->info.vcn_ip_version >= VCN_5_0_0)
|
if (pdev->info.vcn_ip_version >= VCN_5_0_0) {
|
||||||
return true;
|
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS;
|
||||||
else if (pdev->info.vcn_ip_version >= VCN_4_0_0)
|
} else if (pdev->info.vcn_ip_version >= VCN_4_0_0) {
|
||||||
return pdev->info.vcn_enc_minor_version >= 22;
|
if (pdev->info.vcn_enc_minor_version >= 22)
|
||||||
else if (pdev->info.vcn_ip_version >= VCN_3_0_0)
|
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS;
|
||||||
return pdev->info.vcn_enc_minor_version >= 33;
|
} else if (pdev->info.vcn_ip_version >= VCN_3_0_0) {
|
||||||
else if (pdev->info.vcn_ip_version >= VCN_2_0_0)
|
if (pdev->info.vcn_enc_minor_version >= 33)
|
||||||
return pdev->info.vcn_enc_minor_version >= 24;
|
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS;
|
||||||
else /* VCN 1 and UVD */
|
} else if (pdev->info.vcn_ip_version >= VCN_2_0_0) {
|
||||||
return false;
|
if (pdev->info.vcn_enc_minor_version >= 24)
|
||||||
|
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS;
|
||||||
|
}
|
||||||
|
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_NONE;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -158,6 +158,11 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_i
|
||||||
gpu_info->family == CHIP_RAVEN2 || gpu_info->family == CHIP_RENOIR || gpu_info->gfx_level >= GFX10_3);
|
gpu_info->family == CHIP_RAVEN2 || gpu_info->family == CHIP_RENOIR || gpu_info->gfx_level >= GFX10_3);
|
||||||
|
|
||||||
gpu_info->has_gang_submit = true;
|
gpu_info->has_gang_submit = true;
|
||||||
|
gpu_info->mesh_fast_launch_2 = gpu_info->gfx_level >= GFX11;
|
||||||
|
gpu_info->hs_offchip_workgroup_dw_size = gpu_info->family == CHIP_HAWAII ? 4096 : 8192;
|
||||||
|
gpu_info->has_ls_vgpr_init_bug = gpu_info->family == CHIP_VEGA10 || gpu_info->family == CHIP_RAVEN;
|
||||||
|
gpu_info->has_graphics = true;
|
||||||
|
gpu_info->ip[AMD_IP_GFX].num_queues = 1;
|
||||||
|
|
||||||
gpu_info->gart_page_size = 4096;
|
gpu_info->gart_page_size = 4096;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -24,7 +24,8 @@ ail_initialize_linear(struct ail_layout *layout)
|
||||||
layout->layer_stride_B = align64(
|
layout->layer_stride_B = align64(
|
||||||
(uint64_t)layout->linear_stride_B * layout->height_px, AIL_CACHELINE);
|
(uint64_t)layout->linear_stride_B * layout->height_px, AIL_CACHELINE);
|
||||||
|
|
||||||
layout->size_B = layout->layer_stride_B * layout->depth_px;
|
layout->size_B =
|
||||||
|
layout->level_offsets_B[0] + (layout->layer_stride_B * layout->depth_px);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -341,6 +342,7 @@ ail_make_miptree(struct ail_layout *layout)
|
||||||
assert(layout->linear_stride_B == 0 && "Invalid nonlinear layout");
|
assert(layout->linear_stride_B == 0 && "Invalid nonlinear layout");
|
||||||
assert(layout->levels >= 1 && "Invalid dimensions");
|
assert(layout->levels >= 1 && "Invalid dimensions");
|
||||||
assert(layout->sample_count_sa >= 1 && "Invalid sample count");
|
assert(layout->sample_count_sa >= 1 && "Invalid sample count");
|
||||||
|
assert(layout->level_offsets_B[0] == 0 && "Invalid offset");
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(!(layout->writeable_image && layout->compressed) &&
|
assert(!(layout->writeable_image && layout->compressed) &&
|
||||||
|
|
|
||||||
|
|
@ -133,6 +133,7 @@ agx_virtio_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops,
|
||||||
memcpy(req->payload, ops, payload_size);
|
memcpy(req->payload, ops, payload_size);
|
||||||
|
|
||||||
int ret = vdrm_send_req(dev->vdrm, &req->hdr, false);
|
int ret = vdrm_send_req(dev->vdrm, &req->hdr, false);
|
||||||
|
free(req);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d\n", ret);
|
fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d\n", ret);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -992,28 +992,34 @@ hk_CmdEndRendering(VkCommandBuffer commandBuffer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
hk_init_heap(const void *data) {
|
||||||
|
struct hk_cmd_buffer *cmd = (struct hk_cmd_buffer *) data;
|
||||||
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||||
|
|
||||||
|
perf_debug(cmd, "Allocating heap");
|
||||||
|
|
||||||
|
size_t size = 128 * 1024 * 1024;
|
||||||
|
dev->heap = agx_bo_create(&dev->dev, size, 0, 0, "Geometry heap");
|
||||||
|
|
||||||
|
/* The geometry state buffer is initialized here and then is treated by
|
||||||
|
* the CPU as rodata, even though the GPU uses it for scratch internally.
|
||||||
|
*/
|
||||||
|
off_t off = dev->rodata.heap - dev->rodata.bo->va->addr;
|
||||||
|
struct agx_heap *map = agx_bo_map(dev->rodata.bo) + off;
|
||||||
|
|
||||||
|
*map = (struct agx_heap){
|
||||||
|
.base = dev->heap->va->addr,
|
||||||
|
.size = size,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
hk_heap(struct hk_cmd_buffer *cmd)
|
hk_heap(struct hk_cmd_buffer *cmd)
|
||||||
{
|
{
|
||||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||||
|
|
||||||
if (unlikely(!dev->heap)) {
|
util_call_once_data(&dev->heap_init_once, hk_init_heap, cmd);
|
||||||
perf_debug(cmd, "Allocating heap");
|
|
||||||
|
|
||||||
size_t size = 128 * 1024 * 1024;
|
|
||||||
dev->heap = agx_bo_create(&dev->dev, size, 0, 0, "Geometry heap");
|
|
||||||
|
|
||||||
/* The geometry state buffer is initialized here and then is treated by
|
|
||||||
* the CPU as rodata, even though the GPU uses it for scratch internally.
|
|
||||||
*/
|
|
||||||
off_t off = dev->rodata.heap - dev->rodata.bo->va->addr;
|
|
||||||
struct agx_heap *map = agx_bo_map(dev->rodata.bo) + off;
|
|
||||||
|
|
||||||
*map = (struct agx_heap){
|
|
||||||
.base = dev->heap->va->addr,
|
|
||||||
.size = size,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We need to free all allocations after each command buffer execution */
|
/* We need to free all allocations after each command buffer execution */
|
||||||
if (!cmd->uses_heap) {
|
if (!cmd->uses_heap) {
|
||||||
|
|
|
||||||
|
|
@ -330,6 +330,7 @@ hk_GetDescriptorSetLayoutSupport(
|
||||||
uint64_t non_variable_size = 0;
|
uint64_t non_variable_size = 0;
|
||||||
uint32_t variable_stride = 0;
|
uint32_t variable_stride = 0;
|
||||||
uint32_t variable_count = 0;
|
uint32_t variable_count = 0;
|
||||||
|
bool variable_is_inline_uniform_block = false;
|
||||||
uint8_t dynamic_buffer_count = 0;
|
uint8_t dynamic_buffer_count = 0;
|
||||||
|
|
||||||
for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
|
for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
|
||||||
|
|
@ -362,6 +363,10 @@ hk_GetDescriptorSetLayoutSupport(
|
||||||
*/
|
*/
|
||||||
variable_count = MAX2(1, binding->descriptorCount);
|
variable_count = MAX2(1, binding->descriptorCount);
|
||||||
variable_stride = stride;
|
variable_stride = stride;
|
||||||
|
|
||||||
|
variable_is_inline_uniform_block =
|
||||||
|
binding->descriptorType ==
|
||||||
|
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK;
|
||||||
} else {
|
} else {
|
||||||
/* Since we're aligning to the maximum and since this is just a
|
/* Since we're aligning to the maximum and since this is just a
|
||||||
* check for whether or not the max buffer size is big enough, we
|
* check for whether or not the max buffer size is big enough, we
|
||||||
|
|
@ -393,12 +398,21 @@ hk_GetDescriptorSetLayoutSupport(
|
||||||
switch (ext->sType) {
|
switch (ext->sType) {
|
||||||
case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT: {
|
case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT: {
|
||||||
VkDescriptorSetVariableDescriptorCountLayoutSupport *vs = (void *)ext;
|
VkDescriptorSetVariableDescriptorCountLayoutSupport *vs = (void *)ext;
|
||||||
|
uint32_t max_var_count;
|
||||||
|
|
||||||
if (variable_stride > 0) {
|
if (variable_stride > 0) {
|
||||||
vs->maxVariableDescriptorCount =
|
max_var_count =
|
||||||
(max_buffer_size - non_variable_size) / variable_stride;
|
(max_buffer_size - non_variable_size) / variable_stride;
|
||||||
} else {
|
} else {
|
||||||
vs->maxVariableDescriptorCount = 0;
|
max_var_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (variable_is_inline_uniform_block) {
|
||||||
|
max_var_count =
|
||||||
|
MIN2(max_var_count, HK_MAX_INLINE_UNIFORM_BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
vs->maxVariableDescriptorCount = max_var_count;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,7 @@ struct hk_device {
|
||||||
* expected to be a legitimate problem. If it is, we can rework later.
|
* expected to be a legitimate problem. If it is, we can rework later.
|
||||||
*/
|
*/
|
||||||
struct agx_bo *heap;
|
struct agx_bo *heap;
|
||||||
|
util_once_flag heap_init_once;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
struct agx_scratch vs, fs, cs;
|
struct agx_scratch vs, fs, cs;
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ get_drm_format_modifier_properties_list(
|
||||||
{
|
{
|
||||||
*out_props = (VkDrmFormatModifierPropertiesEXT){
|
*out_props = (VkDrmFormatModifierPropertiesEXT){
|
||||||
.drmFormatModifier = mod,
|
.drmFormatModifier = mod,
|
||||||
.drmFormatModifierPlaneCount = 1 /* no planar mods */,
|
.drmFormatModifierPlaneCount = vk_format_get_plane_count(vk_format),
|
||||||
.drmFormatModifierTilingFeatures = flags,
|
.drmFormatModifierTilingFeatures = flags,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
@ -96,7 +96,7 @@ get_drm_format_modifier_properties_list_2(
|
||||||
{
|
{
|
||||||
*out_props = (VkDrmFormatModifierProperties2EXT){
|
*out_props = (VkDrmFormatModifierProperties2EXT){
|
||||||
.drmFormatModifier = mod,
|
.drmFormatModifier = mod,
|
||||||
.drmFormatModifierPlaneCount = 1, /* no planar mods */
|
.drmFormatModifierPlaneCount = vk_format_get_plane_count(vk_format),
|
||||||
.drmFormatModifierTilingFeatures = flags,
|
.drmFormatModifierTilingFeatures = flags,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -1424,6 +1424,13 @@ hk_copy_memory_to_image(struct hk_device *device, struct hk_image *dst_image,
|
||||||
uint32_t src_height = info->memoryImageHeight ?: extent.height;
|
uint32_t src_height = info->memoryImageHeight ?: extent.height;
|
||||||
|
|
||||||
uint32_t blocksize_B = util_format_get_blocksize(layout->format);
|
uint32_t blocksize_B = util_format_get_blocksize(layout->format);
|
||||||
|
|
||||||
|
/* Align width and height to block */
|
||||||
|
src_width =
|
||||||
|
DIV_ROUND_UP(src_width, util_format_get_blockwidth(layout->format));
|
||||||
|
src_height =
|
||||||
|
DIV_ROUND_UP(src_height, util_format_get_blockheight(layout->format));
|
||||||
|
|
||||||
uint32_t src_pitch = src_width * blocksize_B;
|
uint32_t src_pitch = src_width * blocksize_B;
|
||||||
|
|
||||||
unsigned start_layer = (dst_image->vk.image_type == VK_IMAGE_TYPE_3D)
|
unsigned start_layer = (dst_image->vk.image_type == VK_IMAGE_TYPE_3D)
|
||||||
|
|
@ -1496,6 +1503,13 @@ hk_copy_image_to_memory(struct hk_device *device, struct hk_image *src_image,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint32_t blocksize_B = util_format_get_blocksize(layout->format);
|
uint32_t blocksize_B = util_format_get_blocksize(layout->format);
|
||||||
|
|
||||||
|
/* Align width and height to block */
|
||||||
|
dst_width =
|
||||||
|
DIV_ROUND_UP(dst_width, util_format_get_blockwidth(layout->format));
|
||||||
|
dst_height =
|
||||||
|
DIV_ROUND_UP(dst_height, util_format_get_blockheight(layout->format));
|
||||||
|
|
||||||
uint32_t dst_pitch = dst_width * blocksize_B;
|
uint32_t dst_pitch = dst_width * blocksize_B;
|
||||||
|
|
||||||
unsigned start_layer = (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
|
unsigned start_layer = (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
|
||||||
|
|
@ -1649,11 +1663,6 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
||||||
&device->physical_device->ubwc_config);
|
&device->physical_device->ubwc_config);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
/* Work tile-by-tile, holding the unswizzled tile in a temporary
|
|
||||||
* buffer.
|
|
||||||
*/
|
|
||||||
char temp_tile[16384];
|
|
||||||
|
|
||||||
unsigned src_level = info->srcSubresource.mipLevel;
|
unsigned src_level = info->srcSubresource.mipLevel;
|
||||||
unsigned dst_level = info->dstSubresource.mipLevel;
|
unsigned dst_level = info->dstSubresource.mipLevel;
|
||||||
uint32_t block_width = src_layout->tilesize_el[src_level].width_el;
|
uint32_t block_width = src_layout->tilesize_el[src_level].width_el;
|
||||||
|
|
@ -1667,6 +1676,12 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t temp_pitch = block_width * src_block_B;
|
uint32_t temp_pitch = block_width * src_block_B;
|
||||||
|
size_t temp_tile_size = temp_pitch * (src_offset.y + extent.height);
|
||||||
|
|
||||||
|
/* Work tile-by-tile, holding the unswizzled tile in a temporary
|
||||||
|
* buffer.
|
||||||
|
*/
|
||||||
|
char *temp_tile = malloc(temp_tile_size);
|
||||||
|
|
||||||
for (unsigned by = src_offset.y / block_height;
|
for (unsigned by = src_offset.y / block_height;
|
||||||
by * block_height < src_offset.y + extent.height; by++) {
|
by * block_height < src_offset.y + extent.height; by++) {
|
||||||
|
|
@ -1683,14 +1698,14 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
||||||
MIN2((bx + 1) * block_width, src_offset.x + extent.width) -
|
MIN2((bx + 1) * block_width, src_offset.x + extent.width) -
|
||||||
src_x_start;
|
src_x_start;
|
||||||
|
|
||||||
assert(height * temp_pitch <= ARRAY_SIZE(temp_tile));
|
|
||||||
|
|
||||||
ail_detile((void *)src, temp_tile, src_layout, src_level,
|
ail_detile((void *)src, temp_tile, src_layout, src_level,
|
||||||
temp_pitch, src_x_start, src_y_start, width, height);
|
temp_pitch, src_x_start, src_y_start, width, height);
|
||||||
ail_tile(dst, temp_tile, dst_layout, dst_level, temp_pitch,
|
ail_tile(dst, temp_tile, dst_layout, dst_level, temp_pitch,
|
||||||
dst_x_start, dst_y_start, width, height);
|
dst_x_start, dst_y_start, width, height);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
free(temp_tile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -859,7 +859,7 @@ hk_get_device_properties(const struct agx_device *dev,
|
||||||
.maxSubgroupSize = 32,
|
.maxSubgroupSize = 32,
|
||||||
.maxComputeWorkgroupSubgroups = 1024 / 32,
|
.maxComputeWorkgroupSubgroups = 1024 / 32,
|
||||||
.requiredSubgroupSizeStages = 0,
|
.requiredSubgroupSizeStages = 0,
|
||||||
.maxInlineUniformBlockSize = 1 << 16,
|
.maxInlineUniformBlockSize = HK_MAX_INLINE_UNIFORM_BLOCK_SIZE,
|
||||||
.maxPerStageDescriptorInlineUniformBlocks = 32,
|
.maxPerStageDescriptorInlineUniformBlocks = 32,
|
||||||
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
|
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
|
||||||
.maxDescriptorSetInlineUniformBlocks = 6 * 32,
|
.maxDescriptorSetInlineUniformBlocks = 6 * 32,
|
||||||
|
|
@ -953,7 +953,7 @@ hk_get_device_properties(const struct agx_device *dev,
|
||||||
.robustUniformBufferAccessSizeAlignment = HK_MIN_UBO_ALIGNMENT,
|
.robustUniformBufferAccessSizeAlignment = HK_MIN_UBO_ALIGNMENT,
|
||||||
|
|
||||||
/* VK_EXT_sample_locations */
|
/* VK_EXT_sample_locations */
|
||||||
.sampleLocationSampleCounts = sample_counts,
|
.sampleLocationSampleCounts = sample_counts & ~VK_SAMPLE_COUNT_1_BIT,
|
||||||
.maxSampleLocationGridSize = (VkExtent2D){1, 1},
|
.maxSampleLocationGridSize = (VkExtent2D){1, 1},
|
||||||
.sampleLocationCoordinateRange[0] = 0.0f,
|
.sampleLocationCoordinateRange[0] = 0.0f,
|
||||||
.sampleLocationCoordinateRange[1] = 0.9375f,
|
.sampleLocationCoordinateRange[1] = 0.9375f,
|
||||||
|
|
|
||||||
|
|
@ -12,18 +12,19 @@
|
||||||
#include "vk_log.h"
|
#include "vk_log.h"
|
||||||
#include "vk_util.h"
|
#include "vk_util.h"
|
||||||
|
|
||||||
#define HK_MAX_SETS 8
|
#define HK_MAX_SETS 8
|
||||||
#define HK_MAX_PUSH_SIZE 256
|
#define HK_MAX_PUSH_SIZE 256
|
||||||
#define HK_MAX_DYNAMIC_BUFFERS 64
|
#define HK_MAX_DYNAMIC_BUFFERS 64
|
||||||
#define HK_MAX_RTS 8
|
#define HK_MAX_RTS 8
|
||||||
#define HK_MIN_SSBO_ALIGNMENT 16
|
#define HK_MIN_SSBO_ALIGNMENT 16
|
||||||
#define HK_MIN_TEXEL_BUFFER_ALIGNMENT 16
|
#define HK_MIN_TEXEL_BUFFER_ALIGNMENT 16
|
||||||
#define HK_MIN_UBO_ALIGNMENT 64
|
#define HK_MIN_UBO_ALIGNMENT 64
|
||||||
#define HK_MAX_VIEWPORTS 16
|
#define HK_MAX_VIEWPORTS 16
|
||||||
#define HK_MAX_DESCRIPTOR_SIZE 64
|
#define HK_MAX_DESCRIPTOR_SIZE 64
|
||||||
#define HK_MAX_PUSH_DESCRIPTORS 32
|
#define HK_MAX_PUSH_DESCRIPTORS 32
|
||||||
#define HK_MAX_DESCRIPTOR_SET_SIZE (1u << 30)
|
#define HK_MAX_DESCRIPTOR_SET_SIZE (1u << 30)
|
||||||
#define HK_MAX_DESCRIPTORS (1 << 20)
|
#define HK_MAX_INLINE_UNIFORM_BLOCK_SIZE (1u << 16)
|
||||||
|
#define HK_MAX_DESCRIPTORS (1 << 20)
|
||||||
#define HK_PUSH_DESCRIPTOR_SET_SIZE \
|
#define HK_PUSH_DESCRIPTOR_SET_SIZE \
|
||||||
(HK_MAX_PUSH_DESCRIPTORS * HK_MAX_DESCRIPTOR_SIZE)
|
(HK_MAX_PUSH_DESCRIPTORS * HK_MAX_DESCRIPTOR_SIZE)
|
||||||
#define HK_SSBO_BOUNDS_CHECK_ALIGNMENT 4
|
#define HK_SSBO_BOUNDS_CHECK_ALIGNMENT 4
|
||||||
|
|
|
||||||
|
|
@ -812,11 +812,6 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||||
/* Now setup the command structs */
|
/* Now setup the command structs */
|
||||||
struct util_dynarray payload;
|
struct util_dynarray payload;
|
||||||
util_dynarray_init(&payload, NULL);
|
util_dynarray_init(&payload, NULL);
|
||||||
union drm_asahi_cmd *cmds = malloc(sizeof(*cmds) * command_count);
|
|
||||||
if (cmds == NULL) {
|
|
||||||
free(cmds);
|
|
||||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned nr_vdm = 0, nr_cdm = 0;
|
unsigned nr_vdm = 0, nr_cdm = 0;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1582,9 +1582,7 @@ enumerate_devices(struct vk_instance *vk_instance)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(primary_fd >= 0);
|
if (render_fd < 0 || primary_fd < 0)
|
||||||
|
|
||||||
if (render_fd < 0)
|
|
||||||
result = VK_ERROR_INCOMPATIBLE_DRIVER;
|
result = VK_ERROR_INCOMPATIBLE_DRIVER;
|
||||||
else
|
else
|
||||||
result = create_physical_device(instance, primary_fd, render_fd, display_fd);
|
result = create_physical_device(instance, primary_fd, render_fd, display_fd);
|
||||||
|
|
|
||||||
|
|
@ -46,12 +46,13 @@ impl_thrd_routine(void *p)
|
||||||
|
|
||||||
/*--------------- 7.25.2 Initialization functions ---------------*/
|
/*--------------- 7.25.2 Initialization functions ---------------*/
|
||||||
// 7.25.2.1
|
// 7.25.2.1
|
||||||
|
#ifndef __once_flag_defined
|
||||||
void
|
void
|
||||||
call_once(once_flag *flag, void (*func)(void))
|
call_once(once_flag *flag, void (*func)(void))
|
||||||
{
|
{
|
||||||
pthread_once(flag, func);
|
pthread_once(flag, func);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*------------- 7.25.3 Condition variable functions -------------*/
|
/*------------- 7.25.3 Condition variable functions -------------*/
|
||||||
// 7.25.3.1
|
// 7.25.3.1
|
||||||
|
|
|
||||||
|
|
@ -118,8 +118,10 @@ typedef pthread_cond_t cnd_t;
|
||||||
typedef pthread_t thrd_t;
|
typedef pthread_t thrd_t;
|
||||||
typedef pthread_key_t tss_t;
|
typedef pthread_key_t tss_t;
|
||||||
typedef pthread_mutex_t mtx_t;
|
typedef pthread_mutex_t mtx_t;
|
||||||
|
#ifndef __once_flag_defined
|
||||||
typedef pthread_once_t once_flag;
|
typedef pthread_once_t once_flag;
|
||||||
# define ONCE_FLAG_INIT PTHREAD_ONCE_INIT
|
# define ONCE_FLAG_INIT PTHREAD_ONCE_INIT
|
||||||
|
#endif
|
||||||
# ifdef PTHREAD_DESTRUCTOR_ITERATIONS
|
# ifdef PTHREAD_DESTRUCTOR_ITERATIONS
|
||||||
# define TSS_DTOR_ITERATIONS PTHREAD_DESTRUCTOR_ITERATIONS
|
# define TSS_DTOR_ITERATIONS PTHREAD_DESTRUCTOR_ITERATIONS
|
||||||
# else
|
# else
|
||||||
|
|
|
||||||
|
|
@ -139,6 +139,7 @@ struct link_uniform_block_active {
|
||||||
bool has_instance_name;
|
bool has_instance_name;
|
||||||
bool has_binding;
|
bool has_binding;
|
||||||
bool is_shader_storage;
|
bool is_shader_storage;
|
||||||
|
bool block_index_assigned;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1197,14 +1198,32 @@ link_linked_shader_uniform_blocks(void *mem_ctx,
|
||||||
|
|
||||||
|
|
||||||
if (!prog->data->spirv) {
|
if (!prog->data->spirv) {
|
||||||
hash_table_foreach(block_hash, entry) {
|
/* Assign block indices in the order they appear in the shader. We could
|
||||||
|
* just loop over the hash table and this would be spec compiliant
|
||||||
|
* however some games seem to incorrectly assume they know the correct
|
||||||
|
* index without checking. So to avoid debugging strange issues anytime
|
||||||
|
* the hash table is modified and the order changes we use this
|
||||||
|
* predictable index allocation instead.
|
||||||
|
*/
|
||||||
|
nir_foreach_variable_in_shader(var, shader->Program->nir) {
|
||||||
|
if (block_type == BLOCK_UBO && !nir_variable_is_in_ubo(var))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (block_type == BLOCK_SSBO && !nir_variable_is_in_ssbo(var))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const struct hash_entry *entry =
|
||||||
|
_mesa_hash_table_search(block_hash,
|
||||||
|
glsl_get_type_name(var->interface_type));
|
||||||
|
|
||||||
struct link_uniform_block_active *const b =
|
struct link_uniform_block_active *const b =
|
||||||
(struct link_uniform_block_active *) entry->data;
|
(struct link_uniform_block_active *) entry->data;
|
||||||
|
if (b->block_index_assigned)
|
||||||
|
continue;
|
||||||
|
|
||||||
const struct glsl_type *blk_type =
|
const struct glsl_type *blk_type =
|
||||||
glsl_without_array(b->var->type) == b->var->interface_type ?
|
glsl_without_array(b->var->type) == b->var->interface_type ?
|
||||||
b->var->type : b->var->interface_type;
|
b->var->type : b->var->interface_type;
|
||||||
|
|
||||||
if (glsl_type_is_array(blk_type)) {
|
if (glsl_type_is_array(blk_type)) {
|
||||||
char *name =
|
char *name =
|
||||||
ralloc_strdup(NULL,
|
ralloc_strdup(NULL,
|
||||||
|
|
@ -1221,6 +1240,7 @@ link_linked_shader_uniform_blocks(void *mem_ctx,
|
||||||
variables, &variable_index, 0, 0, prog, shader->Stage,
|
variables, &variable_index, 0, 0, prog, shader->Stage,
|
||||||
block_type);
|
block_type);
|
||||||
}
|
}
|
||||||
|
b->block_index_assigned = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
nir_foreach_variable_in_shader(var, shader->Program->nir) {
|
nir_foreach_variable_in_shader(var, shader->Program->nir) {
|
||||||
|
|
|
||||||
|
|
@ -28,10 +28,16 @@ glcpp_lex = custom_target(
|
||||||
command : [prog_flex, '-o', '@OUTPUT@', '@INPUT@'],
|
command : [prog_flex, '-o', '@OUTPUT@', '@INPUT@'],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
glcpp_header_gen_deps = declare_dependency(
|
||||||
|
sources : [
|
||||||
|
glcpp_parse[1],
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
libglcpp = static_library(
|
libglcpp = static_library(
|
||||||
'glcpp',
|
'glcpp',
|
||||||
[glcpp_lex, glcpp_parse, files('glcpp.h', 'pp.c')],
|
[glcpp_lex, glcpp_parse, files('glcpp.h', 'pp.c')],
|
||||||
dependencies : idep_mesautil,
|
dependencies : [idep_mesautil, glcpp_header_gen_deps],
|
||||||
include_directories : [inc_include, inc_src, inc_mesa, inc_gallium, inc_gallium_aux],
|
include_directories : [inc_include, inc_src, inc_mesa, inc_gallium, inc_gallium_aux],
|
||||||
c_args : [no_override_init_args, c_msvc_compat_args],
|
c_args : [no_override_init_args, c_msvc_compat_args],
|
||||||
cpp_args : [cpp_msvc_compat_args],
|
cpp_args : [cpp_msvc_compat_args],
|
||||||
|
|
|
||||||
|
|
@ -224,6 +224,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
||||||
case nir_intrinsic_load_subgroup_id_shift_ir3:
|
case nir_intrinsic_load_subgroup_id_shift_ir3:
|
||||||
case nir_intrinsic_load_base_instance:
|
case nir_intrinsic_load_base_instance:
|
||||||
case nir_intrinsic_load_base_vertex:
|
case nir_intrinsic_load_base_vertex:
|
||||||
|
case nir_intrinsic_load_raw_vertex_offset_pan:
|
||||||
case nir_intrinsic_load_first_vertex:
|
case nir_intrinsic_load_first_vertex:
|
||||||
case nir_intrinsic_load_draw_id:
|
case nir_intrinsic_load_draw_id:
|
||||||
case nir_intrinsic_load_is_indexed_draw:
|
case nir_intrinsic_load_is_indexed_draw:
|
||||||
|
|
@ -319,14 +320,10 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
||||||
case nir_intrinsic_load_base_global_invocation_id:
|
case nir_intrinsic_load_base_global_invocation_id:
|
||||||
case nir_intrinsic_load_base_workgroup_id:
|
case nir_intrinsic_load_base_workgroup_id:
|
||||||
case nir_intrinsic_load_alpha_reference_amd:
|
case nir_intrinsic_load_alpha_reference_amd:
|
||||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
|
||||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
|
||||||
case nir_intrinsic_load_shared_uniform_block_intel:
|
|
||||||
case nir_intrinsic_load_barycentric_optimize_amd:
|
case nir_intrinsic_load_barycentric_optimize_amd:
|
||||||
case nir_intrinsic_load_poly_line_smooth_enabled:
|
case nir_intrinsic_load_poly_line_smooth_enabled:
|
||||||
case nir_intrinsic_load_rasterization_primitive_amd:
|
case nir_intrinsic_load_rasterization_primitive_amd:
|
||||||
case nir_intrinsic_unit_test_uniform_amd:
|
case nir_intrinsic_unit_test_uniform_amd:
|
||||||
case nir_intrinsic_load_global_constant_uniform_block_intel:
|
|
||||||
case nir_intrinsic_load_debug_log_desc_amd:
|
case nir_intrinsic_load_debug_log_desc_amd:
|
||||||
case nir_intrinsic_load_xfb_state_address_gfx12_amd:
|
case nir_intrinsic_load_xfb_state_address_gfx12_amd:
|
||||||
case nir_intrinsic_cmat_length:
|
case nir_intrinsic_cmat_length:
|
||||||
|
|
@ -364,6 +361,24 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
||||||
is_divergent = false;
|
is_divergent = false;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||||
|
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||||
|
case nir_intrinsic_load_shared_uniform_block_intel:
|
||||||
|
case nir_intrinsic_load_global_constant_uniform_block_intel:
|
||||||
|
if (options & (nir_divergence_across_subgroups |
|
||||||
|
nir_divergence_multiple_workgroup_per_compute_subgroup)) {
|
||||||
|
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
|
||||||
|
for (unsigned i = 0; i < num_srcs; i++) {
|
||||||
|
if (src_divergent(instr->src[i], state)) {
|
||||||
|
is_divergent = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
is_divergent = false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/* This is divergent because it specifically loads sequential values into
|
/* This is divergent because it specifically loads sequential values into
|
||||||
* successive SIMD lanes.
|
* successive SIMD lanes.
|
||||||
*/
|
*/
|
||||||
|
|
@ -825,6 +840,7 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
||||||
case nir_intrinsic_load_sample_pos_or_center:
|
case nir_intrinsic_load_sample_pos_or_center:
|
||||||
case nir_intrinsic_load_vertex_id_zero_base:
|
case nir_intrinsic_load_vertex_id_zero_base:
|
||||||
case nir_intrinsic_load_vertex_id:
|
case nir_intrinsic_load_vertex_id:
|
||||||
|
case nir_intrinsic_load_raw_vertex_id_pan:
|
||||||
case nir_intrinsic_load_invocation_id:
|
case nir_intrinsic_load_invocation_id:
|
||||||
case nir_intrinsic_load_local_invocation_id:
|
case nir_intrinsic_load_local_invocation_id:
|
||||||
case nir_intrinsic_load_local_invocation_index:
|
case nir_intrinsic_load_local_invocation_index:
|
||||||
|
|
|
||||||
|
|
@ -1069,6 +1069,7 @@ nir_get_io_index_src_number(const nir_intrinsic_instr *instr)
|
||||||
IMG_CASE(atomic):
|
IMG_CASE(atomic):
|
||||||
IMG_CASE(atomic_swap):
|
IMG_CASE(atomic_swap):
|
||||||
IMG_CASE(size):
|
IMG_CASE(size):
|
||||||
|
IMG_CASE(levels):
|
||||||
IMG_CASE(samples):
|
IMG_CASE(samples):
|
||||||
IMG_CASE(texel_address):
|
IMG_CASE(texel_address):
|
||||||
IMG_CASE(samples_identical):
|
IMG_CASE(samples_identical):
|
||||||
|
|
|
||||||
|
|
@ -1228,8 +1228,16 @@ wrap_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||||
static bool
|
static bool
|
||||||
wrap_instrs(nir_shader *shader, wrap_instr_callback callback)
|
wrap_instrs(nir_shader *shader, wrap_instr_callback callback)
|
||||||
{
|
{
|
||||||
return nir_shader_instructions_pass(shader, wrap_instr,
|
bool progress = nir_shader_instructions_pass(shader, wrap_instr,
|
||||||
nir_metadata_none, callback);
|
nir_metadata_none, callback);
|
||||||
|
/* Wrapping jump instructions that are located inside ifs can break SSA
|
||||||
|
* invariants because the else block no longer dominates the merge block.
|
||||||
|
* Repair the SSA to make the validator happy again.
|
||||||
|
*/
|
||||||
|
if (progress)
|
||||||
|
nir_repair_ssa(shader);
|
||||||
|
|
||||||
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
|
||||||
|
|
@ -4096,9 +4096,9 @@ distribute_src_mods = [
|
||||||
(('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
|
(('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
|
||||||
(('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),
|
(('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),
|
||||||
|
|
||||||
(('fneg', ('ffma(is_used_once)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
|
(('fneg', ('ffma(is_used_once,nsz)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
|
||||||
(('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c)),
|
(('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c)),
|
||||||
(('fneg', ('~fadd(is_used_once)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),
|
(('fneg', ('fadd(is_used_once,nsz)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),
|
||||||
|
|
||||||
# Note that fmin <-> fmax. I don't think there is a way to distribute
|
# Note that fmin <-> fmax. I don't think there is a way to distribute
|
||||||
# fabs() into fmin or fmax.
|
# fabs() into fmin or fmax.
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,9 @@ opt_shrink_store_instr(nir_builder *b, nir_intrinsic_instr *instr, bool shrink_i
|
||||||
|
|
||||||
/* Trim the num_components stored according to the write mask. */
|
/* Trim the num_components stored according to the write mask. */
|
||||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||||
unsigned last_bit = util_last_bit(write_mask);
|
/* Don't trim down to an invalid number of components, though. */
|
||||||
|
unsigned last_bit = nir_round_up_components(util_last_bit(write_mask));
|
||||||
|
|
||||||
if (last_bit < instr->num_components) {
|
if (last_bit < instr->num_components) {
|
||||||
nir_def *def = nir_trim_vector(b, instr->src[0].ssa, last_bit);
|
nir_def *def = nir_trim_vector(b, instr->src[0].ssa, last_bit);
|
||||||
nir_src_rewrite(&instr->src[0], def);
|
nir_src_rewrite(&instr->src[0], def);
|
||||||
|
|
|
||||||
|
|
@ -652,6 +652,7 @@ nir_precompiled_build_variant(const nir_function *libfunc,
|
||||||
|
|
||||||
assert(libfunc->workgroup_size[0] != 0 && "must set workgroup size");
|
assert(libfunc->workgroup_size[0] != 0 && "must set workgroup size");
|
||||||
|
|
||||||
|
b.shader->info.workgroup_size_variable = false;
|
||||||
b.shader->info.workgroup_size[0] = libfunc->workgroup_size[0];
|
b.shader->info.workgroup_size[0] = libfunc->workgroup_size[0];
|
||||||
b.shader->info.workgroup_size[1] = libfunc->workgroup_size[1];
|
b.shader->info.workgroup_size[1] = libfunc->workgroup_size[1];
|
||||||
b.shader->info.workgroup_size[2] = libfunc->workgroup_size[2];
|
b.shader->info.workgroup_size[2] = libfunc->workgroup_size[2];
|
||||||
|
|
|
||||||
|
|
@ -4791,22 +4791,30 @@ vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
|
||||||
return &vec->def;
|
return &vec->def;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Creates a copy of `src`, reinterpreting it as `dest_type`.
|
||||||
|
*/
|
||||||
static struct vtn_ssa_value *
|
static struct vtn_ssa_value *
|
||||||
vtn_composite_copy(struct vtn_builder *b, struct vtn_ssa_value *src)
|
vtn_composite_copy_logical(struct vtn_builder *b, struct vtn_ssa_value *src, struct vtn_type* dest_type)
|
||||||
{
|
{
|
||||||
assert(!src->is_variable);
|
assert(!src->is_variable);
|
||||||
|
|
||||||
struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
|
struct vtn_ssa_value *dest = vtn_zalloc(b, struct vtn_ssa_value);
|
||||||
dest->type = src->type;
|
dest->type = glsl_get_bare_type(dest_type->type);
|
||||||
|
|
||||||
if (glsl_type_is_vector_or_scalar(src->type)) {
|
if (glsl_type_is_vector_or_scalar(dest_type->type)) {
|
||||||
dest->def = src->def;
|
dest->def = src->def;
|
||||||
} else {
|
} else {
|
||||||
unsigned elems = glsl_get_length(src->type);
|
unsigned elems = glsl_get_length(dest_type->type);
|
||||||
|
|
||||||
dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
|
dest->elems = vtn_alloc_array(b, struct vtn_ssa_value *, elems);
|
||||||
for (unsigned i = 0; i < elems; i++)
|
|
||||||
dest->elems[i] = vtn_composite_copy(b, src->elems[i]);
|
if (glsl_type_is_struct(dest_type->type) || glsl_type_is_interface(dest_type->type)) {
|
||||||
|
for (unsigned i = 0; i < elems; i++)
|
||||||
|
dest->elems[i] = vtn_composite_copy_logical(b, src->elems[i], dest_type->members[i]);
|
||||||
|
} else {
|
||||||
|
for (unsigned i = 0; i < elems; i++)
|
||||||
|
dest->elems[i] = vtn_composite_copy_logical(b, src->elems[i], dest_type->array_element);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return dest;
|
return dest;
|
||||||
|
|
@ -4814,13 +4822,14 @@ vtn_composite_copy(struct vtn_builder *b, struct vtn_ssa_value *src)
|
||||||
|
|
||||||
static struct vtn_ssa_value *
|
static struct vtn_ssa_value *
|
||||||
vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
|
vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src,
|
||||||
struct vtn_ssa_value *insert, const uint32_t *indices,
|
struct vtn_type *src_type, struct vtn_ssa_value *insert,
|
||||||
unsigned num_indices)
|
const uint32_t *indices, unsigned num_indices)
|
||||||
{
|
{
|
||||||
if (glsl_type_is_cmat(src->type))
|
if (glsl_type_is_cmat(src->type))
|
||||||
return vtn_cooperative_matrix_insert(b, src, insert, indices, num_indices);
|
return vtn_cooperative_matrix_insert(b, src, insert, indices, num_indices);
|
||||||
|
|
||||||
struct vtn_ssa_value *dest = vtn_composite_copy(b, src);
|
/* Straight copy, use the source type as the destination type. */
|
||||||
|
struct vtn_ssa_value *dest = vtn_composite_copy_logical(b, src, src_type);
|
||||||
|
|
||||||
struct vtn_ssa_value *cur = dest;
|
struct vtn_ssa_value *cur = dest;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
@ -4963,15 +4972,15 @@ vtn_handle_composite(struct vtn_builder *b, SpvOp opcode,
|
||||||
|
|
||||||
case SpvOpCompositeInsert:
|
case SpvOpCompositeInsert:
|
||||||
ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
|
ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]),
|
||||||
|
vtn_get_value_type(b, w[4]),
|
||||||
vtn_ssa_value(b, w[3]),
|
vtn_ssa_value(b, w[3]),
|
||||||
w + 5, count - 5);
|
w + 5, count - 5);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SpvOpCopyLogical: {
|
case SpvOpCopyLogical: {
|
||||||
ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3]));
|
struct vtn_type *dest_type = vtn_get_value_type(b, w[2]);
|
||||||
struct vtn_type *dst_type = vtn_get_value_type(b, w[2]);
|
vtn_assert(vtn_types_compatible(b, vtn_get_value_type(b, w[3]), dest_type));
|
||||||
vtn_assert(vtn_types_compatible(b, type, dst_type));
|
ssa = vtn_composite_copy_logical(b, vtn_ssa_value(b, w[3]), dest_type);
|
||||||
ssa->type = glsl_get_bare_type(dst_type->type);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SpvOpCopyObject:
|
case SpvOpCopyObject:
|
||||||
|
|
|
||||||
|
|
@ -506,8 +506,8 @@ vtn_pointer_dereference(struct vtn_builder *b,
|
||||||
type = type->array_element;
|
type = type->array_element;
|
||||||
}
|
}
|
||||||
tail = nir_build_deref_array(&b->nb, tail, arr_index);
|
tail = nir_build_deref_array(&b->nb, tail, arr_index);
|
||||||
|
tail->arr.in_bounds = deref_chain->in_bounds;
|
||||||
}
|
}
|
||||||
tail->arr.in_bounds = deref_chain->in_bounds;
|
|
||||||
|
|
||||||
access |= type->access;
|
access |= type->access;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -564,7 +564,7 @@ tiled_to_linear_2cpp(char *_tiled, char *_linear, uint32_t linear_pitch)
|
||||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15");
|
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
memcpy_small<2, LINEAR_TO_TILED, FDL_MACROTILE_4_CHANNEL>(
|
memcpy_small<2, TILED_TO_LINEAR, FDL_MACROTILE_4_CHANNEL>(
|
||||||
0, 0, 32, 4, _tiled, _linear, linear_pitch, 0, 0, 0);
|
0, 0, 32, 4, _tiled, _linear, linear_pitch, 0, 0, 0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2300,6 +2300,17 @@ insert_live_out_moves(struct ra_ctx *ctx)
|
||||||
insert_file_live_out_moves(ctx, &ctx->shared);
|
insert_file_live_out_moves(ctx, &ctx->shared);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
has_merge_set_preferred_reg(struct ir3_register *reg)
|
||||||
|
{
|
||||||
|
assert(reg->merge_set);
|
||||||
|
assert(reg->num != INVALID_REG);
|
||||||
|
|
||||||
|
return reg->merge_set->preferred_reg != (physreg_t)~0 &&
|
||||||
|
ra_reg_get_physreg(reg) ==
|
||||||
|
reg->merge_set->preferred_reg + reg->merge_set_offset;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
handle_block(struct ra_ctx *ctx, struct ir3_block *block)
|
handle_block(struct ra_ctx *ctx, struct ir3_block *block)
|
||||||
{
|
{
|
||||||
|
|
@ -2338,17 +2349,15 @@ handle_block(struct ra_ctx *ctx, struct ir3_block *block)
|
||||||
struct ir3_register *dst = input->dsts[0];
|
struct ir3_register *dst = input->dsts[0];
|
||||||
assert(dst->num != INVALID_REG);
|
assert(dst->num != INVALID_REG);
|
||||||
|
|
||||||
physreg_t dst_start = ra_reg_get_physreg(dst);
|
|
||||||
physreg_t dst_end;
|
physreg_t dst_end;
|
||||||
|
|
||||||
if (dst->merge_set) {
|
if (dst->merge_set && has_merge_set_preferred_reg(dst)) {
|
||||||
/* Take the whole merge set into account to prevent its range being
|
/* Take the whole merge set into account to prevent its range being
|
||||||
* allocated for defs not part of the merge set.
|
* allocated for defs not part of the merge set.
|
||||||
*/
|
*/
|
||||||
assert(dst_start >= dst->merge_set_offset);
|
dst_end = dst->merge_set->preferred_reg + dst->merge_set->size;
|
||||||
dst_end = dst_start - dst->merge_set_offset + dst->merge_set->size;
|
|
||||||
} else {
|
} else {
|
||||||
dst_end = dst_start + reg_size(dst);
|
dst_end = ra_reg_get_physreg(dst) + reg_size(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ra_file *file = ra_get_file(ctx, dst);
|
struct ra_file *file = ra_get_file(ctx, dst);
|
||||||
|
|
|
||||||
|
|
@ -1461,6 +1461,15 @@ r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||||
gmem_offset = tu_attachment_gmem_offset(cmd, att, layer);
|
gmem_offset = tu_attachment_gmem_offset(cmd, att, layer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* On a7xx we must always use FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8. See
|
||||||
|
* blit_base_format().
|
||||||
|
*/
|
||||||
|
if (CHIP >= A7XX && att->format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||||||
|
RB_MRT_BUF_INFO = pkt_field_set(A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT,
|
||||||
|
RB_MRT_BUF_INFO,
|
||||||
|
FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8);
|
||||||
|
}
|
||||||
|
|
||||||
tu_cs_emit_regs(cs,
|
tu_cs_emit_regs(cs,
|
||||||
RB_MRT_BUF_INFO(CHIP, 0, .dword = RB_MRT_BUF_INFO),
|
RB_MRT_BUF_INFO(CHIP, 0, .dword = RB_MRT_BUF_INFO),
|
||||||
A6XX_RB_MRT_PITCH(0, 0),
|
A6XX_RB_MRT_PITCH(0, 0),
|
||||||
|
|
@ -1533,7 +1542,8 @@ r3d_setup(struct tu_cmd_buffer *cmd,
|
||||||
tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs);
|
tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum a6xx_format fmt = blit_base_format<CHIP>(dst_format, ubwc, false);
|
enum a6xx_format fmt = blit_base_format<CHIP>(dst_format, ubwc,
|
||||||
|
blit_param & R3D_DST_GMEM);
|
||||||
fixup_dst_format(src_format, &dst_format, &fmt);
|
fixup_dst_format(src_format, &dst_format, &fmt);
|
||||||
|
|
||||||
if (!cmd->state.pass) {
|
if (!cmd->state.pass) {
|
||||||
|
|
@ -4638,7 +4648,7 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
enum pipe_format format = vk_format_to_pipe_format(vk_format);
|
enum pipe_format format = vk_format_to_pipe_format(vk_format);
|
||||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||||
const struct tu_image_view *iview = cmd->state.attachments[a];
|
const struct tu_image_view *iview = cmd->state.attachments[a];
|
||||||
const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views;
|
const uint32_t clear_views = cmd->state.pass->attachments[a].used_views;
|
||||||
const struct blit_ops *ops = &r2d_ops<CHIP>;
|
const struct blit_ops *ops = &r2d_ops<CHIP>;
|
||||||
const VkClearValue *value = &cmd->state.clear_values[a];
|
const VkClearValue *value = &cmd->state.clear_values[a];
|
||||||
if (cmd->state.pass->attachments[a].samples > 1)
|
if (cmd->state.pass->attachments[a].samples > 1)
|
||||||
|
|
@ -4734,7 +4744,7 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
|
|
||||||
tu_emit_clear_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, 0,
|
tu_emit_clear_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, 0,
|
||||||
cmd->state.framebuffer->layers,
|
cmd->state.framebuffer->layers,
|
||||||
attachment->clear_views,
|
attachment->used_views,
|
||||||
attachment->clear_mask,
|
attachment->clear_mask,
|
||||||
&cmd->state.clear_values[a], NULL);
|
&cmd->state.clear_values[a], NULL);
|
||||||
}
|
}
|
||||||
|
|
@ -4755,7 +4765,7 @@ tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd,
|
||||||
iview->view.ubwc_enabled, att->samples);
|
iview->view.ubwc_enabled, att->samples);
|
||||||
|
|
||||||
enum pipe_format format = vk_format_to_pipe_format(att->format);
|
enum pipe_format format = vk_format_to_pipe_format(att->format);
|
||||||
for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) {
|
for_each_layer(i, att->used_views, cmd->state.framebuffer->layers) {
|
||||||
uint32_t layer = i + 0;
|
uint32_t layer = i + 0;
|
||||||
uint32_t mask =
|
uint32_t mask =
|
||||||
aspect_write_mask_generic_clear(format, att->clear_mask);
|
aspect_write_mask_generic_clear(format, att->clear_mask);
|
||||||
|
|
@ -4836,7 +4846,7 @@ tu_emit_blit(struct tu_cmd_buffer *cmd,
|
||||||
uint32_t buffer_id = tu_resolve_group_include_buffer<CHIP>(resolve_group, format);
|
uint32_t buffer_id = tu_resolve_group_include_buffer<CHIP>(resolve_group, format);
|
||||||
event_blit_setup(cs, buffer_id, attachment, blit_event_type, clear_mask);
|
event_blit_setup(cs, buffer_id, attachment, blit_event_type, clear_mask);
|
||||||
|
|
||||||
for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) {
|
for_each_layer(i, attachment->used_views, cmd->state.framebuffer->layers) {
|
||||||
event_blit_dst_view blt_view = blt_view_from_tu_view(iview, i);
|
event_blit_dst_view blt_view = blt_view_from_tu_view(iview, i);
|
||||||
event_blit_run<CHIP>(cmd, cs, attachment, &blt_view, separate_stencil);
|
event_blit_run<CHIP>(cmd, cs, attachment, &blt_view, separate_stencil);
|
||||||
}
|
}
|
||||||
|
|
@ -4951,7 +4961,7 @@ load_3d_blit(struct tu_cmd_buffer *cmd,
|
||||||
/* Wait for CACHE_INVALIDATE to land */
|
/* Wait for CACHE_INVALIDATE to land */
|
||||||
tu_cs_emit_wfi(cs);
|
tu_cs_emit_wfi(cs);
|
||||||
|
|
||||||
for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) {
|
for_each_layer(i, att->used_views, cmd->state.framebuffer->layers) {
|
||||||
if (cmd->state.pass->has_fdm) {
|
if (cmd->state.pass->has_fdm) {
|
||||||
struct apply_load_coords_state state = {
|
struct apply_load_coords_state state = {
|
||||||
.view = i,
|
.view = i,
|
||||||
|
|
|
||||||
|
|
@ -1616,7 +1616,7 @@ tu6_emit_gmem_stores(struct tu_cmd_buffer *cmd,
|
||||||
scissor_emitted = true;
|
scissor_emitted = true;
|
||||||
}
|
}
|
||||||
tu_store_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, a,
|
tu_store_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, a,
|
||||||
fb->layers, subpass->multiview_mask,
|
fb->layers, att->used_views,
|
||||||
cond_exec_allowed);
|
cond_exec_allowed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -6868,7 +6868,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd,
|
||||||
struct tu_render_pass_state *rp = &cmd->state.rp;
|
struct tu_render_pass_state *rp = &cmd->state.rp;
|
||||||
|
|
||||||
trace_start_draw(
|
trace_start_draw(
|
||||||
&cmd->trace, &cmd->draw_cs, cmd, draw_count,
|
&cmd->rp_trace, &cmd->draw_cs, cmd, draw_count,
|
||||||
cmd->state.program.stage_sha1[MESA_SHADER_VERTEX],
|
cmd->state.program.stage_sha1[MESA_SHADER_VERTEX],
|
||||||
cmd->state.program.stage_sha1[MESA_SHADER_TESS_CTRL],
|
cmd->state.program.stage_sha1[MESA_SHADER_TESS_CTRL],
|
||||||
cmd->state.program.stage_sha1[MESA_SHADER_TESS_EVAL],
|
cmd->state.program.stage_sha1[MESA_SHADER_TESS_EVAL],
|
||||||
|
|
@ -7316,7 +7316,7 @@ tu_CmdDraw(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit(cs, instanceCount);
|
tu_cs_emit(cs, instanceCount);
|
||||||
tu_cs_emit(cs, vertexCount);
|
tu_cs_emit(cs, vertexCount);
|
||||||
|
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDraw);
|
TU_GENX(tu_CmdDraw);
|
||||||
|
|
||||||
|
|
@ -7365,7 +7365,7 @@ tu_CmdDrawMultiEXT(VkCommandBuffer commandBuffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i != 0)
|
if (i != 0)
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawMultiEXT);
|
TU_GENX(tu_CmdDrawMultiEXT);
|
||||||
|
|
||||||
|
|
@ -7393,7 +7393,7 @@ tu_CmdDrawIndexed(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, cmd->state.index_va);
|
tu_cs_emit_qw(cs, cmd->state.index_va);
|
||||||
tu_cs_emit(cs, cmd->state.max_index_count);
|
tu_cs_emit(cs, cmd->state.max_index_count);
|
||||||
|
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndexed);
|
TU_GENX(tu_CmdDrawIndexed);
|
||||||
|
|
||||||
|
|
@ -7447,7 +7447,7 @@ tu_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i != 0)
|
if (i != 0)
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawMultiIndexedEXT);
|
TU_GENX(tu_CmdDrawMultiIndexedEXT);
|
||||||
|
|
||||||
|
|
@ -7492,7 +7492,7 @@ tu_CmdDrawIndirect(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
||||||
tu_cs_emit(cs, stride);
|
tu_cs_emit(cs, stride);
|
||||||
|
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndirect);
|
TU_GENX(tu_CmdDrawIndirect);
|
||||||
|
|
||||||
|
|
@ -7525,7 +7525,7 @@ tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&buf->vk, offset));
|
||||||
tu_cs_emit(cs, stride);
|
tu_cs_emit(cs, stride);
|
||||||
|
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndexedIndirect);
|
TU_GENX(tu_CmdDrawIndexedIndirect);
|
||||||
|
|
||||||
|
|
@ -7564,7 +7564,7 @@ tu_CmdDrawIndirectCount(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
|
||||||
tu_cs_emit(cs, stride);
|
tu_cs_emit(cs, stride);
|
||||||
|
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndirectCount);
|
TU_GENX(tu_CmdDrawIndirectCount);
|
||||||
|
|
||||||
|
|
@ -7600,7 +7600,7 @@ tu_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
|
tu_cs_emit_qw(cs, vk_buffer_address(&count_buf->vk, countBufferOffset));
|
||||||
tu_cs_emit(cs, stride);
|
tu_cs_emit(cs, stride);
|
||||||
|
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndexedIndirectCount);
|
TU_GENX(tu_CmdDrawIndexedIndirectCount);
|
||||||
|
|
||||||
|
|
@ -7644,7 +7644,7 @@ tu_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer,
|
||||||
tu_cs_emit(cs, counterOffset);
|
tu_cs_emit(cs, counterOffset);
|
||||||
tu_cs_emit(cs, vertexStride);
|
tu_cs_emit(cs, vertexStride);
|
||||||
|
|
||||||
trace_end_draw(&cmd->trace, cs);
|
trace_end_draw(&cmd->rp_trace, cs);
|
||||||
}
|
}
|
||||||
TU_GENX(tu_CmdDrawIndirectByteCountEXT);
|
TU_GENX(tu_CmdDrawIndirectByteCountEXT);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -208,8 +208,8 @@ tu_CreateDescriptorSetLayout(
|
||||||
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
|
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
|
||||||
set_layout->has_inline_uniforms = true;
|
set_layout->has_inline_uniforms = true;
|
||||||
|
|
||||||
if (variable_flags && binding->binding < variable_flags->bindingCount &&
|
if (variable_flags && j < variable_flags->bindingCount &&
|
||||||
(variable_flags->pBindingFlags[binding->binding] &
|
(variable_flags->pBindingFlags[j] &
|
||||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
||||||
assert(!binding->pImmutableSamplers); /* Terribly ill defined how
|
assert(!binding->pImmutableSamplers); /* Terribly ill defined how
|
||||||
many samplers are valid */
|
many samplers are valid */
|
||||||
|
|
@ -377,7 +377,7 @@ tu_GetDescriptorSetLayoutSupport(
|
||||||
uint64_t max_count = MAX_SET_SIZE;
|
uint64_t max_count = MAX_SET_SIZE;
|
||||||
unsigned descriptor_count = binding->descriptorCount;
|
unsigned descriptor_count = binding->descriptorCount;
|
||||||
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||||
max_count = MAX_SET_SIZE - size;
|
max_count = MAX_INLINE_UBO_RANGE - size;
|
||||||
descriptor_count = descriptor_sz;
|
descriptor_count = descriptor_sz;
|
||||||
descriptor_sz = 1;
|
descriptor_sz = 1;
|
||||||
} else if (descriptor_sz) {
|
} else if (descriptor_sz) {
|
||||||
|
|
@ -388,9 +388,9 @@ tu_GetDescriptorSetLayoutSupport(
|
||||||
supported = false;
|
supported = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (variable_flags && binding->binding < variable_flags->bindingCount &&
|
if (variable_flags && i < variable_flags->bindingCount &&
|
||||||
variable_count &&
|
variable_count &&
|
||||||
(variable_flags->pBindingFlags[binding->binding] &
|
(variable_flags->pBindingFlags[i] &
|
||||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
||||||
variable_count->maxVariableDescriptorCount =
|
variable_count->maxVariableDescriptorCount =
|
||||||
MIN2(UINT32_MAX, max_count);
|
MIN2(UINT32_MAX, max_count);
|
||||||
|
|
|
||||||
|
|
@ -417,7 +417,8 @@ tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
|
||||||
uint32_t a = subpass->input_attachments[j].attachment;
|
uint32_t a = subpass->input_attachments[j].attachment;
|
||||||
if (a == VK_ATTACHMENT_UNUSED)
|
if (a == VK_ATTACHMENT_UNUSED)
|
||||||
continue;
|
continue;
|
||||||
subpass->input_attachments[j].patch_input_gmem = written[a];
|
subpass->input_attachments[j].patch_input_gmem =
|
||||||
|
written[a] && pass->attachments[a].gmem;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned j = 0; j < subpass->color_count; j++) {
|
for (unsigned j = 0; j < subpass->color_count; j++) {
|
||||||
|
|
@ -884,7 +885,7 @@ tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const
|
||||||
|
|
||||||
att->gmem = true;
|
att->gmem = true;
|
||||||
update_samples(subpass, pCreateInfo->pAttachments[a].samples);
|
update_samples(subpass, pCreateInfo->pAttachments[a].samples);
|
||||||
att->clear_views |= subpass->multiview_mask;
|
att->used_views |= subpass->multiview_mask;
|
||||||
|
|
||||||
/* Loads and clears are emitted at the start of the subpass that needs them. */
|
/* Loads and clears are emitted at the start of the subpass that needs them. */
|
||||||
att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
|
att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
|
||||||
|
|
@ -1126,6 +1127,7 @@ tu_CreateRenderPass2(VkDevice _device,
|
||||||
if (!att->gmem) {
|
if (!att->gmem) {
|
||||||
att->clear_mask = 0;
|
att->clear_mask = 0;
|
||||||
att->load = false;
|
att->load = false;
|
||||||
|
att->load_stencil = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1235,7 +1237,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
|
||||||
VK_FROM_HANDLE(tu_image_view, view, att_info->imageView);
|
VK_FROM_HANDLE(tu_image_view, view, att_info->imageView);
|
||||||
tu_setup_dynamic_attachment(att, view);
|
tu_setup_dynamic_attachment(att, view);
|
||||||
att->gmem = true;
|
att->gmem = true;
|
||||||
att->clear_views = info->viewMask;
|
att->used_views = info->viewMask;
|
||||||
attachment_set_ops(device, att, att_info->loadOp,
|
attachment_set_ops(device, att, att_info->loadOp,
|
||||||
VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp,
|
VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp,
|
||||||
VK_ATTACHMENT_STORE_OP_DONT_CARE);
|
VK_ATTACHMENT_STORE_OP_DONT_CARE);
|
||||||
|
|
@ -1279,7 +1281,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
|
||||||
struct tu_render_pass_attachment *att = &pass->attachments[a];
|
struct tu_render_pass_attachment *att = &pass->attachments[a];
|
||||||
tu_setup_dynamic_attachment(att, view);
|
tu_setup_dynamic_attachment(att, view);
|
||||||
att->gmem = true;
|
att->gmem = true;
|
||||||
att->clear_views = info->viewMask;
|
att->used_views = info->viewMask;
|
||||||
subpass->depth_stencil_attachment.attachment = a++;
|
subpass->depth_stencil_attachment.attachment = a++;
|
||||||
subpass->input_attachments[0].attachment =
|
subpass->input_attachments[0].attachment =
|
||||||
subpass->depth_stencil_attachment.attachment;
|
subpass->depth_stencil_attachment.attachment;
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,19 @@ struct tu_render_pass_attachment
|
||||||
VkSampleCountFlagBits samples;
|
VkSampleCountFlagBits samples;
|
||||||
uint32_t cpp;
|
uint32_t cpp;
|
||||||
VkImageAspectFlags clear_mask;
|
VkImageAspectFlags clear_mask;
|
||||||
uint32_t clear_views;
|
|
||||||
|
/* All views that are used with the attachment in all subpasses. Used to
|
||||||
|
* determine which views to apply loadOp/storeOp to.
|
||||||
|
*/
|
||||||
|
uint32_t used_views;
|
||||||
|
/* The internal MSRTSS attachment to clear when the user says to clear
|
||||||
|
* this attachment. Clear values must be remapped to this attachment.
|
||||||
|
*/
|
||||||
|
uint32_t remapped_clear_att;
|
||||||
|
/* For internal attachments created for MSRTSS, the original user attachment
|
||||||
|
* which it is resolved/unresolved to.
|
||||||
|
*/
|
||||||
|
uint32_t user_att;
|
||||||
bool load;
|
bool load;
|
||||||
bool store;
|
bool store;
|
||||||
bool gmem;
|
bool gmem;
|
||||||
|
|
|
||||||
|
|
@ -3157,8 +3157,6 @@ tu6_emit_blend(struct tu_cs *cs,
|
||||||
|
|
||||||
bool dual_src_blend = tu_blend_state_is_dual_src(cb);
|
bool dual_src_blend = tu_blend_state_is_dual_src(cb);
|
||||||
|
|
||||||
tu_cs_emit_regs(cs, A6XX_SP_PS_MRT_CNTL(.mrt = num_rts));
|
|
||||||
tu_cs_emit_regs(cs, A6XX_RB_PS_MRT_CNTL(.mrt = num_rts));
|
|
||||||
tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask,
|
tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask,
|
||||||
.unk8 = true,
|
.unk8 = true,
|
||||||
.dual_color_in_enable =
|
.dual_color_in_enable =
|
||||||
|
|
@ -3180,10 +3178,12 @@ tu6_emit_blend(struct tu_cs *cs,
|
||||||
.alpha_to_one = alpha_to_one_enable,
|
.alpha_to_one = alpha_to_one_enable,
|
||||||
.sample_mask = sample_mask));
|
.sample_mask = sample_mask));
|
||||||
|
|
||||||
|
unsigned num_remapped_rts = 0;
|
||||||
for (unsigned i = 0; i < num_rts; i++) {
|
for (unsigned i = 0; i < num_rts; i++) {
|
||||||
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||||
continue;
|
continue;
|
||||||
unsigned remapped_idx = cal->color_map[i];
|
unsigned remapped_idx = cal->color_map[i];
|
||||||
|
num_remapped_rts = MAX2(num_remapped_rts, remapped_idx + 1);
|
||||||
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
|
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
|
||||||
if ((cb->color_write_enables & (1u << i)) && i < cb->attachment_count) {
|
if ((cb->color_write_enables & (1u << i)) && i < cb->attachment_count) {
|
||||||
const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->color_blend_op);
|
const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->color_blend_op);
|
||||||
|
|
@ -3227,6 +3227,8 @@ tu6_emit_blend(struct tu_cs *cs,
|
||||||
A6XX_RB_MRT_BLEND_CONTROL(remapped_idx,));
|
A6XX_RB_MRT_BLEND_CONTROL(remapped_idx,));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
tu_cs_emit_regs(cs, A6XX_SP_PS_MRT_CNTL(.mrt = num_remapped_rts));
|
||||||
|
tu_cs_emit_regs(cs, A6XX_RB_PS_MRT_CNTL(.mrt = num_remapped_rts));
|
||||||
}
|
}
|
||||||
|
|
||||||
static const enum mesa_vk_dynamic_graphics_state tu_blend_constants_state[] = {
|
static const enum mesa_vk_dynamic_graphics_state tu_blend_constants_state[] = {
|
||||||
|
|
|
||||||
|
|
@ -185,12 +185,6 @@ static void noop_set_vertex_buffers(struct pipe_context *ctx,
|
||||||
unsigned count,
|
unsigned count,
|
||||||
const struct pipe_vertex_buffer *buffers)
|
const struct pipe_vertex_buffer *buffers)
|
||||||
{
|
{
|
||||||
for (unsigned i = 0; i < count; i++) {
|
|
||||||
if (!buffers[i].is_user_buffer) {
|
|
||||||
struct pipe_resource *buf = buffers[i].buffer.resource;
|
|
||||||
pipe_resource_reference(&buf, NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *noop_create_vertex_elements(struct pipe_context *ctx,
|
static void *noop_create_vertex_elements(struct pipe_context *ctx,
|
||||||
|
|
|
||||||
|
|
@ -88,10 +88,9 @@
|
||||||
#define LLVMCreateBuilder ILLEGAL_LLVM_FUNCTION
|
#define LLVMCreateBuilder ILLEGAL_LLVM_FUNCTION
|
||||||
|
|
||||||
typedef struct lp_context_ref {
|
typedef struct lp_context_ref {
|
||||||
#if GALLIVM_USE_ORCJIT
|
|
||||||
LLVMOrcThreadSafeContextRef ref;
|
|
||||||
#else
|
|
||||||
LLVMContextRef ref;
|
LLVMContextRef ref;
|
||||||
|
#if GALLIVM_USE_ORCJIT
|
||||||
|
LLVMOrcThreadSafeContextRef tsref;
|
||||||
#endif
|
#endif
|
||||||
bool owned;
|
bool owned;
|
||||||
} lp_context_ref;
|
} lp_context_ref;
|
||||||
|
|
@ -101,18 +100,21 @@ lp_context_create(lp_context_ref *context)
|
||||||
{
|
{
|
||||||
assert(context != NULL);
|
assert(context != NULL);
|
||||||
#if GALLIVM_USE_ORCJIT
|
#if GALLIVM_USE_ORCJIT
|
||||||
context->ref = LLVMOrcCreateNewThreadSafeContext();
|
#if LLVM_VERSION_MAJOR >= 21
|
||||||
|
context->ref = LLVMContextCreate();
|
||||||
|
/* Ownership of ref is then transferred to tsref */
|
||||||
|
context->tsref = LLVMOrcCreateNewThreadSafeContextFromLLVMContext(context->ref);
|
||||||
|
#else
|
||||||
|
context->tsref = LLVMOrcCreateNewThreadSafeContext();
|
||||||
|
context->ref = LLVMOrcThreadSafeContextGetContext(context->tsref);
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
context->ref = LLVMContextCreate();
|
context->ref = LLVMContextCreate();
|
||||||
#endif
|
#endif
|
||||||
context->owned = true;
|
context->owned = true;
|
||||||
#if LLVM_VERSION_MAJOR == 15
|
#if LLVM_VERSION_MAJOR == 15
|
||||||
if (context->ref) {
|
if (context->ref) {
|
||||||
#if GALLIVM_USE_ORCJIT
|
|
||||||
LLVMContextSetOpaquePointers(LLVMOrcThreadSafeContextGetContext(context->ref), false);
|
|
||||||
#else
|
|
||||||
LLVMContextSetOpaquePointers(context->ref, false);
|
LLVMContextSetOpaquePointers(context->ref, false);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
@ -123,7 +125,7 @@ lp_context_destroy(lp_context_ref *context)
|
||||||
assert(context != NULL);
|
assert(context != NULL);
|
||||||
if (context->owned) {
|
if (context->owned) {
|
||||||
#if GALLIVM_USE_ORCJIT
|
#if GALLIVM_USE_ORCJIT
|
||||||
LLVMOrcDisposeThreadSafeContext(context->ref);
|
LLVMOrcDisposeThreadSafeContext(context->tsref);
|
||||||
#else
|
#else
|
||||||
LLVMContextDispose(context->ref);
|
LLVMContextDispose(context->ref);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -555,8 +555,8 @@ init_gallivm_state(struct gallivm_state *gallivm, const char *name,
|
||||||
|
|
||||||
gallivm->cache = cache;
|
gallivm->cache = cache;
|
||||||
|
|
||||||
gallivm->_ts_context = context->ref;
|
gallivm->_ts_context = context->tsref;
|
||||||
gallivm->context = LLVMContextCreate();
|
gallivm->context = context->ref;
|
||||||
|
|
||||||
gallivm->module_name = LPJit::get_unique_name(name);
|
gallivm->module_name = LPJit::get_unique_name(name);
|
||||||
gallivm->module = LLVMModuleCreateWithNameInContext(gallivm->module_name,
|
gallivm->module = LLVMModuleCreateWithNameInContext(gallivm->module_name,
|
||||||
|
|
|
||||||
|
|
@ -3163,7 +3163,7 @@ do_int_divide(struct lp_build_nir_soa_context *bld,
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
do_int_mod(struct lp_build_nir_soa_context *bld,
|
do_int_mod(struct lp_build_nir_soa_context *bld,
|
||||||
bool is_unsigned, unsigned src_bit_size,
|
bool is_unsigned, bool use_src2_sign, unsigned src_bit_size,
|
||||||
LLVMValueRef src, LLVMValueRef src2)
|
LLVMValueRef src, LLVMValueRef src2)
|
||||||
{
|
{
|
||||||
struct gallivm_state *gallivm = bld->base.gallivm;
|
struct gallivm_state *gallivm = bld->base.gallivm;
|
||||||
|
|
@ -3180,8 +3180,18 @@ do_int_mod(struct lp_build_nir_soa_context *bld,
|
||||||
divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
|
divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
|
||||||
src_bit_size, src, divisor);
|
src_bit_size, src, divisor);
|
||||||
}
|
}
|
||||||
LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
|
LLVMValueRef rem = lp_build_mod(int_bld, src, divisor);
|
||||||
return LLVMBuildOr(builder, div_mask, result, "");
|
rem = LLVMBuildOr(builder, div_mask, rem, "");
|
||||||
|
|
||||||
|
if (use_src2_sign) {
|
||||||
|
LLVMValueRef add_src2 = LLVMBuildICmp(builder, LLVMIntNE, rem, int_bld->zero, "");
|
||||||
|
LLVMValueRef signs_different = LLVMBuildXor(builder, LLVMBuildICmp(builder, LLVMIntSLT, src, int_bld->zero, ""),
|
||||||
|
LLVMBuildICmp(builder, LLVMIntSLT, src2, int_bld->zero, ""), "");
|
||||||
|
add_src2 = LLVMBuildAnd(builder, add_src2, signs_different, "");
|
||||||
|
rem = LLVMBuildSelect(builder, add_src2, LLVMBuildAdd(builder, rem, src2, ""), rem, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
return rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
|
|
@ -3493,7 +3503,7 @@ do_alu_action(struct lp_build_nir_soa_context *bld,
|
||||||
break;
|
break;
|
||||||
case nir_op_imod:
|
case nir_op_imod:
|
||||||
case nir_op_irem:
|
case nir_op_irem:
|
||||||
result = do_int_mod(bld, false, src_bit_size[0], src[0], src[1]);
|
result = do_int_mod(bld, false, instr->op == nir_op_imod, src_bit_size[0], src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
case nir_op_ishl: {
|
case nir_op_ishl: {
|
||||||
if (src_bit_size[0] == 64)
|
if (src_bit_size[0] == 64)
|
||||||
|
|
@ -3592,7 +3602,7 @@ do_alu_action(struct lp_build_nir_soa_context *bld,
|
||||||
result = lp_build_min(uint_bld, src[0], src[1]);
|
result = lp_build_min(uint_bld, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
case nir_op_umod:
|
case nir_op_umod:
|
||||||
result = do_int_mod(bld, true, src_bit_size[0], src[0], src[1]);
|
result = do_int_mod(bld, true, false, src_bit_size[0], src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
case nir_op_umul_high: {
|
case nir_op_umul_high: {
|
||||||
LLVMValueRef hi_bits;
|
LLVMValueRef hi_bits;
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,7 @@ DRI_CONF_SECTION_END
|
||||||
DRI_CONF_SECTION_MISCELLANEOUS
|
DRI_CONF_SECTION_MISCELLANEOUS
|
||||||
DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER(false)
|
DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER(false)
|
||||||
DRI_CONF_GLSL_ZERO_INIT(false)
|
DRI_CONF_GLSL_ZERO_INIT(false)
|
||||||
|
DRI_CONF_VERTEX_PROGRAM_DEFAULT_OUT(false)
|
||||||
DRI_CONF_VS_POSITION_ALWAYS_INVARIANT(false)
|
DRI_CONF_VS_POSITION_ALWAYS_INVARIANT(false)
|
||||||
DRI_CONF_VS_POSITION_ALWAYS_PRECISE(false)
|
DRI_CONF_VS_POSITION_ALWAYS_PRECISE(false)
|
||||||
DRI_CONF_ALLOW_RGB10_CONFIGS(true)
|
DRI_CONF_ALLOW_RGB10_CONFIGS(true)
|
||||||
|
|
|
||||||
|
|
@ -76,6 +76,7 @@ u_driconf_fill_st_options(struct st_config_options *options,
|
||||||
query_string_option(force_gl_renderer);
|
query_string_option(force_gl_renderer);
|
||||||
query_string_option(mesa_extension_override);
|
query_string_option(mesa_extension_override);
|
||||||
query_bool_option(allow_multisampled_copyteximage);
|
query_bool_option(allow_multisampled_copyteximage);
|
||||||
|
query_bool_option(vertex_program_default_out);
|
||||||
|
|
||||||
driComputeOptionsSha1(optionCache, options->config_options_sha1);
|
driComputeOptionsSha1(optionCache, options->config_options_sha1);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -634,8 +634,8 @@ asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc)
|
||||||
assert(att->count < MAX_ATTACHMENTS);
|
assert(att->count < MAX_ATTACHMENTS);
|
||||||
|
|
||||||
att->list[att->count++] = (struct drm_asahi_attachment){
|
att->list[att->count++] = (struct drm_asahi_attachment){
|
||||||
.size = rsrc->layout.size_B,
|
.size = rsrc->layout.size_B - rsrc->layout.level_offsets_B[0],
|
||||||
.pointer = rsrc->bo->va->addr,
|
.pointer = agx_map_gpu(rsrc),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -210,13 +210,13 @@ agx_resource_from_handle(struct pipe_screen *pscreen,
|
||||||
|
|
||||||
if (rsc->layout.tiling == AIL_TILING_LINEAR) {
|
if (rsc->layout.tiling == AIL_TILING_LINEAR) {
|
||||||
rsc->layout.linear_stride_B = whandle->stride;
|
rsc->layout.linear_stride_B = whandle->stride;
|
||||||
} else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0)) {
|
rsc->layout.level_offsets_B[0] = whandle->offset;
|
||||||
|
} else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0) ||
|
||||||
|
whandle->offset != 0) {
|
||||||
FREE(rsc);
|
FREE(rsc);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(whandle->offset == 0);
|
|
||||||
|
|
||||||
ail_make_miptree(&rsc->layout);
|
ail_make_miptree(&rsc->layout);
|
||||||
|
|
||||||
if (prsc->target == PIPE_BUFFER) {
|
if (prsc->target == PIPE_BUFFER) {
|
||||||
|
|
@ -301,7 +301,8 @@ agx_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
|
||||||
enum pipe_resource_param param, unsigned usage,
|
enum pipe_resource_param param, unsigned usage,
|
||||||
uint64_t *value)
|
uint64_t *value)
|
||||||
{
|
{
|
||||||
struct agx_resource *rsrc = (struct agx_resource *)prsc;
|
struct agx_resource *rsrc =
|
||||||
|
(struct agx_resource *)util_resource_at_index(prsc, plane);
|
||||||
|
|
||||||
switch (param) {
|
switch (param) {
|
||||||
case PIPE_RESOURCE_PARAM_STRIDE:
|
case PIPE_RESOURCE_PARAM_STRIDE:
|
||||||
|
|
@ -1292,7 +1293,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
|
|
||||||
if (zres->layout.compressed) {
|
if (zres->layout.compressed) {
|
||||||
c->depth.comp_base =
|
c->depth.comp_base =
|
||||||
agx_map_texture_gpu(zres, 0) + zres->layout.metadata_offset_B +
|
agx_map_gpu(zres) + zres->layout.metadata_offset_B +
|
||||||
(first_layer * zres->layout.compression_layer_stride_B) +
|
(first_layer * zres->layout.compression_layer_stride_B) +
|
||||||
zres->layout.level_offsets_compressed_B[level];
|
zres->layout.level_offsets_compressed_B[level];
|
||||||
|
|
||||||
|
|
@ -1329,7 +1330,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
|
|
||||||
if (sres->layout.compressed) {
|
if (sres->layout.compressed) {
|
||||||
c->stencil.comp_base =
|
c->stencil.comp_base =
|
||||||
agx_map_texture_gpu(sres, 0) + sres->layout.metadata_offset_B +
|
agx_map_gpu(sres) + sres->layout.metadata_offset_B +
|
||||||
(first_layer * sres->layout.compression_layer_stride_B) +
|
(first_layer * sres->layout.compression_layer_stride_B) +
|
||||||
sres->layout.level_offsets_compressed_B[level];
|
sres->layout.level_offsets_compressed_B[level];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -503,7 +503,7 @@ agx_get_query_result_resource_gpu(struct agx_context *ctx,
|
||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
libagx_copy_query_gl(batch, agx_1d(1), AGX_BARRIER_ALL, query->ptr.gpu,
|
libagx_copy_query_gl(batch, agx_1d(1), AGX_BARRIER_ALL, query->ptr.gpu,
|
||||||
rsrc->bo->va->addr + offset, result_type, bool_size);
|
agx_map_gpu(rsrc) + offset, result_type, bool_size);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -726,7 +726,7 @@ agx_pack_texture(void *out, struct agx_resource *rsrc,
|
||||||
|
|
||||||
if (rsrc->layout.compressed) {
|
if (rsrc->layout.compressed) {
|
||||||
cfg.acceleration_buffer =
|
cfg.acceleration_buffer =
|
||||||
agx_map_texture_gpu(rsrc, 0) + rsrc->layout.metadata_offset_B +
|
agx_map_gpu(rsrc) + rsrc->layout.metadata_offset_B +
|
||||||
(first_layer * rsrc->layout.compression_layer_stride_B);
|
(first_layer * rsrc->layout.compression_layer_stride_B);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1262,7 +1262,7 @@ agx_batch_upload_pbe(struct agx_batch *batch, struct agx_pbe_packed *out,
|
||||||
cfg.extended = true;
|
cfg.extended = true;
|
||||||
|
|
||||||
cfg.acceleration_buffer =
|
cfg.acceleration_buffer =
|
||||||
agx_map_texture_gpu(tex, 0) + tex->layout.metadata_offset_B +
|
agx_map_gpu(tex) + tex->layout.metadata_offset_B +
|
||||||
(layer * tex->layout.compression_layer_stride_B);
|
(layer * tex->layout.compression_layer_stride_B);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3756,8 +3756,9 @@ agx_index_buffer_rsrc_ptr(struct agx_batch *batch,
|
||||||
struct agx_resource *rsrc = agx_resource(info->index.resource);
|
struct agx_resource *rsrc = agx_resource(info->index.resource);
|
||||||
agx_batch_reads(batch, rsrc);
|
agx_batch_reads(batch, rsrc);
|
||||||
|
|
||||||
*extent = ALIGN_POT(rsrc->layout.size_B, 4);
|
*extent =
|
||||||
return rsrc->bo->va->addr;
|
ALIGN_POT(rsrc->layout.size_B - rsrc->layout.level_offsets_B[0], 4);
|
||||||
|
return agx_map_gpu(rsrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t
|
static uint64_t
|
||||||
|
|
@ -3948,7 +3949,7 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
||||||
params.xfb_size[i] = size;
|
params.xfb_size[i] = size;
|
||||||
|
|
||||||
if (rsrc) {
|
if (rsrc) {
|
||||||
params.xfb_offs_ptrs[i] = rsrc->bo->va->addr;
|
params.xfb_offs_ptrs[i] = agx_map_gpu(rsrc);
|
||||||
agx_batch_writes(batch, rsrc, 0);
|
agx_batch_writes(batch, rsrc, 0);
|
||||||
batch->incoherent_writes = true;
|
batch->incoherent_writes = true;
|
||||||
}
|
}
|
||||||
|
|
@ -4054,7 +4055,7 @@ agx_indirect_buffer_ptr(struct agx_batch *batch,
|
||||||
|
|
||||||
struct agx_resource *rsrc = agx_resource(indirect->buffer);
|
struct agx_resource *rsrc = agx_resource(indirect->buffer);
|
||||||
agx_batch_reads(batch, rsrc);
|
agx_batch_reads(batch, rsrc);
|
||||||
return rsrc->bo->va->addr + indirect->offset;
|
return agx_map_gpu(rsrc) + indirect->offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -5388,7 +5389,7 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
||||||
if (info->indirect) {
|
if (info->indirect) {
|
||||||
struct agx_resource *rsrc = agx_resource(info->indirect);
|
struct agx_resource *rsrc = agx_resource(info->indirect);
|
||||||
agx_batch_reads(batch, rsrc);
|
agx_batch_reads(batch, rsrc);
|
||||||
indirect = rsrc->bo->va->addr + info->indirect_offset;
|
indirect = agx_map_gpu(rsrc) + info->indirect_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Increment the pipeline stats query.
|
/* Increment the pipeline stats query.
|
||||||
|
|
@ -5493,7 +5494,7 @@ agx_set_global_binding(struct pipe_context *pipe, unsigned first,
|
||||||
struct agx_resource *rsrc = agx_resource(resources[i]);
|
struct agx_resource *rsrc = agx_resource(resources[i]);
|
||||||
|
|
||||||
memcpy(&addr, handles[i], sizeof(addr));
|
memcpy(&addr, handles[i], sizeof(addr));
|
||||||
addr += rsrc->bo->va->addr;
|
addr += agx_map_gpu(rsrc);
|
||||||
memcpy(handles[i], &addr, sizeof(addr));
|
memcpy(handles[i], &addr, sizeof(addr));
|
||||||
} else {
|
} else {
|
||||||
pipe_resource_reference(res, NULL);
|
pipe_resource_reference(res, NULL);
|
||||||
|
|
@ -5534,7 +5535,7 @@ agx_decompress_inplace(struct agx_batch *batch, struct pipe_surface *surf,
|
||||||
surf->last_layer - surf->first_layer + 1);
|
surf->last_layer - surf->first_layer + 1);
|
||||||
|
|
||||||
libagx_decompress(batch, grid, AGX_BARRIER_ALL, layout, surf->first_layer,
|
libagx_decompress(batch, grid, AGX_BARRIER_ALL, layout, surf->first_layer,
|
||||||
level, agx_map_texture_gpu(rsrc, 0), images.gpu);
|
level, agx_map_gpu(rsrc), images.gpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -970,10 +970,16 @@ agx_map_texture_cpu(struct agx_resource *rsrc, unsigned level, unsigned z)
|
||||||
ail_get_layer_level_B(&rsrc->layout, z, level);
|
ail_get_layer_level_B(&rsrc->layout, z, level);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uint64_t
|
||||||
|
agx_map_gpu(struct agx_resource *rsrc)
|
||||||
|
{
|
||||||
|
return rsrc->bo->va->addr + rsrc->layout.level_offsets_B[0];
|
||||||
|
}
|
||||||
|
|
||||||
static inline uint64_t
|
static inline uint64_t
|
||||||
agx_map_texture_gpu(struct agx_resource *rsrc, unsigned z)
|
agx_map_texture_gpu(struct agx_resource *rsrc, unsigned z)
|
||||||
{
|
{
|
||||||
return rsrc->bo->va->addr +
|
return agx_map_gpu(rsrc) +
|
||||||
(uint64_t)ail_get_layer_offset_B(&rsrc->layout, z);
|
(uint64_t)ail_get_layer_offset_B(&rsrc->layout, z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -116,7 +116,7 @@ agx_batch_get_so_address(struct agx_batch *batch, unsigned buffer,
|
||||||
target->buffer_size);
|
target->buffer_size);
|
||||||
|
|
||||||
*size = target->buffer_size;
|
*size = target->buffer_size;
|
||||||
return rsrc->bo->va->addr + target->buffer_offset;
|
return agx_map_gpu(rsrc) + target->buffer_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,9 @@
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*/
|
*/
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "asahi/genxml/agx_pack.h"
|
|
||||||
#include "pipe/p_state.h"
|
#include "pipe/p_state.h"
|
||||||
#include "util/format/u_format.h"
|
|
||||||
#include "util/half_float.h"
|
#include "util/half_float.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
#include "agx_abi.h"
|
|
||||||
#include "agx_device.h"
|
#include "agx_device.h"
|
||||||
#include "agx_state.h"
|
#include "agx_state.h"
|
||||||
#include "pool.h"
|
#include "pool.h"
|
||||||
|
|
@ -19,8 +16,7 @@ agx_const_buffer_ptr(struct agx_batch *batch, struct pipe_constant_buffer *cb)
|
||||||
if (cb->buffer) {
|
if (cb->buffer) {
|
||||||
struct agx_resource *rsrc = agx_resource(cb->buffer);
|
struct agx_resource *rsrc = agx_resource(cb->buffer);
|
||||||
agx_batch_reads(batch, rsrc);
|
agx_batch_reads(batch, rsrc);
|
||||||
|
return agx_map_gpu(rsrc) + cb->buffer_offset;
|
||||||
return rsrc->bo->va->addr + cb->buffer_offset;
|
|
||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -42,8 +38,9 @@ agx_upload_vbos(struct agx_batch *batch)
|
||||||
struct agx_resource *rsrc = agx_resource(vb.buffer.resource);
|
struct agx_resource *rsrc = agx_resource(vb.buffer.resource);
|
||||||
agx_batch_reads(batch, rsrc);
|
agx_batch_reads(batch, rsrc);
|
||||||
|
|
||||||
buffers[vbo] = rsrc->bo->va->addr + vb.buffer_offset;
|
buffers[vbo] = agx_map_gpu(rsrc) + vb.buffer_offset;
|
||||||
buf_sizes[vbo] = rsrc->layout.size_B - vb.buffer_offset;
|
buf_sizes[vbo] = rsrc->layout.size_B - vb.buffer_offset -
|
||||||
|
rsrc->layout.level_offsets_B[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -144,7 +141,7 @@ agx_set_ssbo_uniforms(struct agx_batch *batch, mesa_shader_stage stage)
|
||||||
agx_batch_reads(batch, rsrc);
|
agx_batch_reads(batch, rsrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
unif->ssbo_base[cb] = rsrc->bo->va->addr + sb->buffer_offset;
|
unif->ssbo_base[cb] = agx_map_gpu(rsrc) + sb->buffer_offset;
|
||||||
unif->ssbo_size[cb] = st->ssbo[cb].buffer_size;
|
unif->ssbo_size[cb] = st->ssbo[cb].buffer_size;
|
||||||
} else {
|
} else {
|
||||||
/* Invalid, so use the sink */
|
/* Invalid, so use the sink */
|
||||||
|
|
|
||||||
|
|
@ -464,6 +464,15 @@ iris_blorp_exec_blitter(struct blorp_batch *blorp_batch,
|
||||||
|
|
||||||
iris_bo_bump_seqno(params->dst.addr.buffer, batch->next_seqno,
|
iris_bo_bump_seqno(params->dst.addr.buffer, batch->next_seqno,
|
||||||
IRIS_DOMAIN_OTHER_WRITE);
|
IRIS_DOMAIN_OTHER_WRITE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TDOD: Add INTEL_NEEDS_WA_14025112257 check once HSD is propogated for all
|
||||||
|
* other impacted platforms.
|
||||||
|
*/
|
||||||
|
if (batch->screen->devinfo->ver >= 20 && batch->name == IRIS_BATCH_COMPUTE) {
|
||||||
|
iris_emit_pipe_control_flush(batch, "WA_14025112257",
|
||||||
|
PIPE_CONTROL_STATE_CACHE_INVALIDATE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
||||||
|
|
@ -223,9 +223,9 @@ iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
|
||||||
|
|
||||||
iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
|
iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
|
||||||
|
|
||||||
iris->partitioning = brw->partitioning;
|
iris->partitioning = brw_tess_info_partitioning(brw->tess_info);
|
||||||
iris->output_topology = brw->output_topology;
|
iris->output_topology = brw_tess_info_output_topology(brw->tess_info);
|
||||||
iris->domain = brw->domain;
|
iris->domain = brw_tess_info_domain(brw->tess_info);
|
||||||
iris->include_primitive_id = brw->include_primitive_id;
|
iris->include_primitive_id = brw->include_primitive_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9901,6 +9901,16 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if GFX_VER >= 12
|
||||||
|
/* BSpec 47112 (xe), 56551 (xe2): Instruction_PIPE_CONTROL (ComputeCS):
|
||||||
|
* SW must follow below programming restrictions when programming
|
||||||
|
* PIPE_CONTROL command:
|
||||||
|
* "Command Streamer Stall Enable" must be always set.
|
||||||
|
*/
|
||||||
|
if (batch->name == IRIS_BATCH_COMPUTE)
|
||||||
|
flags |= PIPE_CONTROL_CS_STALL;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* The "L3 Read Only Cache Invalidation Bit" docs say it "controls the
|
/* The "L3 Read Only Cache Invalidation Bit" docs say it "controls the
|
||||||
* invalidation of the Geometry streams cached in L3 cache at the top
|
* invalidation of the Geometry streams cached in L3 cache at the top
|
||||||
* of the pipe". In other words, index & vertex data that gets cached
|
* of the pipe". In other words, index & vertex data that gets cached
|
||||||
|
|
|
||||||
|
|
@ -338,7 +338,6 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld,
|
||||||
LLVMBuilderRef builder = gallivm->builder;
|
LLVMBuilderRef builder = gallivm->builder;
|
||||||
struct lp_build_context *coeff_bld = &bld->coeff_bld;
|
struct lp_build_context *coeff_bld = &bld->coeff_bld;
|
||||||
struct lp_build_context *setup_bld = &bld->setup_bld;
|
struct lp_build_context *setup_bld = &bld->setup_bld;
|
||||||
LLVMValueRef oow = NULL;
|
|
||||||
LLVMValueRef pixoffx;
|
LLVMValueRef pixoffx;
|
||||||
LLVMValueRef pixoffy;
|
LLVMValueRef pixoffy;
|
||||||
LLVMValueRef ptr;
|
LLVMValueRef ptr;
|
||||||
|
|
@ -425,25 +424,23 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (interp == LP_INTERP_PERSPECTIVE) {
|
if (interp == LP_INTERP_PERSPECTIVE) {
|
||||||
if (oow == NULL) {
|
LLVMValueRef w;
|
||||||
LLVMValueRef w;
|
assert(attrib != 0);
|
||||||
assert(attrib != 0);
|
assert(bld->mask[0] & TGSI_WRITEMASK_W);
|
||||||
assert(bld->mask[0] & TGSI_WRITEMASK_W);
|
if (bld->coverage_samples > 1 &&
|
||||||
if (bld->coverage_samples > 1 &&
|
(loc == TGSI_INTERPOLATE_LOC_SAMPLE ||
|
||||||
(loc == TGSI_INTERPOLATE_LOC_SAMPLE ||
|
loc == TGSI_INTERPOLATE_LOC_CENTROID)) {
|
||||||
loc == TGSI_INTERPOLATE_LOC_CENTROID)) {
|
/*
|
||||||
/*
|
* We can't use the precalculated 1/w since we didn't know
|
||||||
* We can't use the precalculated 1/w since we didn't know
|
* the actual position yet (we were assuming center).
|
||||||
* the actual position yet (we were assuming center).
|
*/
|
||||||
*/
|
LLVMValueRef indexw = lp_build_const_int32(gallivm, 3);
|
||||||
LLVMValueRef indexw = lp_build_const_int32(gallivm, 3);
|
w = interp_attrib_linear(bld, 0, indexw, chan_pixoffx, chan_pixoffy);
|
||||||
w = interp_attrib_linear(bld, 0, indexw, chan_pixoffx, chan_pixoffy);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
w = bld->attribs[0][3];
|
|
||||||
}
|
|
||||||
oow = lp_build_rcp(coeff_bld, w);
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
w = bld->attribs[0][3];
|
||||||
|
}
|
||||||
|
LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
|
||||||
a = lp_build_mul(coeff_bld, a, oow);
|
a = lp_build_mul(coeff_bld, a, oow);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1357,7 +1357,7 @@ llvmpipe_free_memory(struct pipe_screen *pscreen,
|
||||||
#if DETECT_OS_LINUX
|
#if DETECT_OS_LINUX
|
||||||
struct llvmpipe_screen *screen = llvmpipe_screen(pscreen);
|
struct llvmpipe_screen *screen = llvmpipe_screen(pscreen);
|
||||||
|
|
||||||
if (mem->fd) {
|
if (mem->fd >= 0) {
|
||||||
mtx_lock(&screen->mem_mutex);
|
mtx_lock(&screen->mem_mutex);
|
||||||
util_vma_heap_free(&screen->mem_heap, mem->offset, mem->size);
|
util_vma_heap_free(&screen->mem_heap, mem->offset, mem->size);
|
||||||
mtx_unlock(&screen->mem_mutex);
|
mtx_unlock(&screen->mem_mutex);
|
||||||
|
|
@ -1415,8 +1415,7 @@ llvmpipe_resource_alloc_udmabuf(struct llvmpipe_screen *screen,
|
||||||
|
|
||||||
struct pipe_memory_allocation *data =
|
struct pipe_memory_allocation *data =
|
||||||
mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, mem_fd, 0);
|
mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, mem_fd, 0);
|
||||||
|
if (data == MAP_FAILED)
|
||||||
if (!data)
|
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
alloc->mem_fd = mem_fd;
|
alloc->mem_fd = mem_fd;
|
||||||
|
|
@ -1486,6 +1485,9 @@ llvmpipe_import_memory_fd(struct pipe_screen *screen,
|
||||||
bool dmabuf)
|
bool dmabuf)
|
||||||
{
|
{
|
||||||
struct llvmpipe_memory_allocation *alloc = CALLOC_STRUCT(llvmpipe_memory_allocation);
|
struct llvmpipe_memory_allocation *alloc = CALLOC_STRUCT(llvmpipe_memory_allocation);
|
||||||
|
if (!alloc)
|
||||||
|
return false;
|
||||||
|
|
||||||
alloc->mem_fd = -1;
|
alloc->mem_fd = -1;
|
||||||
alloc->dmabuf_fd = -1;
|
alloc->dmabuf_fd = -1;
|
||||||
#if defined(HAVE_LIBDRM) && defined(HAVE_LINUX_UDMABUF_H)
|
#if defined(HAVE_LIBDRM) && defined(HAVE_LINUX_UDMABUF_H)
|
||||||
|
|
@ -1596,9 +1598,13 @@ llvmpipe_resource_bind_backing(struct pipe_screen *pscreen,
|
||||||
if (!lpr->backable)
|
if (!lpr->backable)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if ((lpr->base.flags & PIPE_RESOURCE_FLAG_SPARSE) && offset < lpr->size_required) {
|
if (lpr->base.flags & PIPE_RESOURCE_FLAG_SPARSE) {
|
||||||
#if DETECT_OS_LINUX
|
#if DETECT_OS_LINUX
|
||||||
struct llvmpipe_memory_allocation *mem = (struct llvmpipe_memory_allocation *)pmem;
|
struct llvmpipe_memory_allocation *mem = (struct llvmpipe_memory_allocation *)pmem;
|
||||||
|
|
||||||
|
if (offset >= lpr->size_required)
|
||||||
|
return false;
|
||||||
|
|
||||||
if (mem) {
|
if (mem) {
|
||||||
if (llvmpipe_resource_is_texture(&lpr->base)) {
|
if (llvmpipe_resource_is_texture(&lpr->base)) {
|
||||||
mmap((char *)lpr->tex_data + offset, size, PROT_READ|PROT_WRITE,
|
mmap((char *)lpr->tex_data + offset, size, PROT_READ|PROT_WRITE,
|
||||||
|
|
@ -1618,9 +1624,11 @@ llvmpipe_resource_bind_backing(struct pipe_screen *pscreen,
|
||||||
MAP_SHARED|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
|
MAP_SHARED|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
addr = llvmpipe_map_memory(pscreen, pmem);
|
addr = llvmpipe_map_memory(pscreen, pmem);
|
||||||
|
|
|
||||||
|
|
@ -1236,44 +1236,6 @@ spec@nv_texture_env_combine4@nv_texture_env_combine4-combine,Fail
|
||||||
|
|
||||||
spec@oes_texture_float@oes_texture_float half,Fail
|
spec@oes_texture_float@oes_texture_float half,Fail
|
||||||
|
|
||||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
|
|
||||||
# uprev Piglit in Mesa
|
# uprev Piglit in Mesa
|
||||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||||
spec@glsl-1.10@execution@glsl-fs-texture2d-mipmap-const-bias-01,Fail
|
spec@glsl-1.10@execution@glsl-fs-texture2d-mipmap-const-bias-01,Fail
|
||||||
|
|
|
||||||
|
|
@ -1280,44 +1280,6 @@ spec@nv_texture_env_combine4@nv_texture_env_combine4-combine,Fail
|
||||||
|
|
||||||
spec@oes_texture_float@oes_texture_float half,Fail
|
spec@oes_texture_float@oes_texture_float half,Fail
|
||||||
|
|
||||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
|
|
||||||
# uprev Piglit in Mesa
|
# uprev Piglit in Mesa
|
||||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||||
spec@glsl-1.10@execution@glsl-fs-texture2d-mipmap-const-bias-01,Fail
|
spec@glsl-1.10@execution@glsl-fs-texture2d-mipmap-const-bias-01,Fail
|
||||||
|
|
|
||||||
|
|
@ -778,78 +778,6 @@ dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_repeat,Fail
|
||||||
dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_mirror,Fail
|
dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_mirror,Fail
|
||||||
dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_clamp,Fail
|
dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_clamp,Fail
|
||||||
|
|
||||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE NPOT,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA NPOT,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
|
||||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
|
||||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI,Fail
|
|
||||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI NPOT,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_LATC1_EXT,Fail
|
|
||||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_LATC1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_red_green_rgtc2_ext,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_signed_red_green_rgtc2_ext,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_signed_red_rgtc1_ext,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RED_RGTC1,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RED_RGTC1 NPOT,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RG_RGTC2,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RG_RGTC2 NPOT,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED NPOT,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED_RGTC1,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED_RGTC1 NPOT,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG_RGTC2,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG_RGTC2 NPOT,Fail
|
|
||||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
|
||||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
|
||||||
|
|
||||||
# uprev Piglit in Mesa
|
# uprev Piglit in Mesa
|
||||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||||
spec@ext_framebuffer_multisample@accuracy all_samples color depthstencil linear,Fail
|
spec@ext_framebuffer_multisample@accuracy all_samples color depthstencil linear,Fail
|
||||||
|
|
|
||||||
|
|
@ -947,6 +947,32 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
|
||||||
util_framebuffer_init(pipe, state, r300->fb_cbufs, &r300->fb_zsbuf);
|
util_framebuffer_init(pipe, state, r300->fb_cbufs, &r300->fb_zsbuf);
|
||||||
util_copy_framebuffer_state(r300->fb_state.state, state);
|
util_copy_framebuffer_state(r300->fb_state.state, state);
|
||||||
|
|
||||||
|
/* DXTC blits require that blocks are 2x1 or 4x1 pixels, but
|
||||||
|
* pipe_surface_width sets the framebuffer width as if blocks were 1x1
|
||||||
|
* pixels. Override the width to correct that.
|
||||||
|
*/
|
||||||
|
if (state->nr_cbufs == 1 && state->cbufs[0].texture &&
|
||||||
|
state->cbufs[0].format == PIPE_FORMAT_R8G8B8A8_UNORM &&
|
||||||
|
util_format_is_compressed(state->cbufs[0].texture->format)) {
|
||||||
|
struct pipe_framebuffer_state *fb =
|
||||||
|
(struct pipe_framebuffer_state*)r300->fb_state.state;
|
||||||
|
const struct util_format_description *desc =
|
||||||
|
util_format_description(state->cbufs[0].texture->format);
|
||||||
|
unsigned width = u_minify(state->cbufs[0].texture->width0,
|
||||||
|
state->cbufs[0].level);
|
||||||
|
|
||||||
|
assert(desc->block.width == 4 && desc->block.height == 4);
|
||||||
|
|
||||||
|
/* Each 64-bit DXT block is 2x1 pixels, and each 128-bit DXT
|
||||||
|
* block is 4x1 pixels when blitting.
|
||||||
|
*/
|
||||||
|
width = align(width, 4); /* align to the DXT block width. */
|
||||||
|
if (desc->block.bits == 64)
|
||||||
|
width = DIV_ROUND_UP(width, 2);
|
||||||
|
|
||||||
|
fb->width = width;
|
||||||
|
}
|
||||||
|
|
||||||
/* Remove trailing NULL colorbuffers. */
|
/* Remove trailing NULL colorbuffers. */
|
||||||
while (current_state->nr_cbufs && !current_state->cbufs[current_state->nr_cbufs-1].texture)
|
while (current_state->nr_cbufs && !current_state->cbufs[current_state->nr_cbufs-1].texture)
|
||||||
current_state->nr_cbufs--;
|
current_state->nr_cbufs--;
|
||||||
|
|
|
||||||
|
|
@ -201,6 +201,7 @@ void r600_draw_rectangle(struct blitter_context *blitter,
|
||||||
rctx->b.set_vertex_buffers(&rctx->b, 1, &vbuffer);
|
rctx->b.set_vertex_buffers(&rctx->b, 1, &vbuffer);
|
||||||
util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3,
|
util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3,
|
||||||
0, num_instances);
|
0, num_instances);
|
||||||
|
pipe_resource_reference(&buf, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
|
static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@
|
||||||
#include "util/u_memory.h"
|
#include "util/u_memory.h"
|
||||||
#include "util/u_pack_color.h"
|
#include "util/u_pack_color.h"
|
||||||
#include "util/u_surface.h"
|
#include "util/u_surface.h"
|
||||||
|
#include "util/u_resource.h"
|
||||||
#include "util/os_time.h"
|
#include "util/os_time.h"
|
||||||
#include "frontend/winsys_handle.h"
|
#include "frontend/winsys_handle.h"
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
@ -442,7 +443,7 @@ static bool r600_texture_get_param(struct pipe_screen *screen,
|
||||||
|
|
||||||
switch (param) {
|
switch (param) {
|
||||||
case PIPE_RESOURCE_PARAM_NPLANES:
|
case PIPE_RESOURCE_PARAM_NPLANES:
|
||||||
*value = 1;
|
*value = util_resource_num(resource);
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case PIPE_RESOURCE_PARAM_STRIDE:
|
case PIPE_RESOURCE_PARAM_STRIDE:
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,16 @@ AluGroup::AluGroup()
|
||||||
m_free_slots = has_t() ? 0x1f : 0xf;
|
m_free_slots = has_t() ? 0x1f : 0xf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
AluGroup::apply_add_instr(AluInstr *instr)
|
||||||
|
{
|
||||||
|
instr->set_parent_group(this);
|
||||||
|
instr->pin_dest_to_chan();
|
||||||
|
m_has_kill_op |= instr->is_kill();
|
||||||
|
m_has_pred_update |= instr->has_alu_flag(alu_update_exec);
|
||||||
|
assert(!(m_has_kill_op && m_has_pred_update));
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
AluGroup::add_instruction(AluInstr *instr)
|
AluGroup::add_instruction(AluInstr *instr)
|
||||||
{
|
{
|
||||||
|
|
@ -32,17 +42,13 @@ AluGroup::add_instruction(AluInstr *instr)
|
||||||
ASSERTED auto opinfo = alu_ops.find(instr->opcode());
|
ASSERTED auto opinfo = alu_ops.find(instr->opcode());
|
||||||
assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
|
assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
|
||||||
if (add_trans_instructions(instr)) {
|
if (add_trans_instructions(instr)) {
|
||||||
instr->set_parent_group(this);
|
apply_add_instr(instr);
|
||||||
instr->pin_dest_to_chan();
|
|
||||||
m_has_kill_op |= instr->is_kill();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
|
if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
|
||||||
instr->set_parent_group(this);
|
apply_add_instr(instr);
|
||||||
instr->pin_dest_to_chan();
|
|
||||||
m_has_kill_op |= instr->is_kill();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -51,9 +57,7 @@ AluGroup::add_instruction(AluInstr *instr)
|
||||||
|
|
||||||
if (s_max_slots > 4 && opinfo->second.can_channel(AluOp::t, s_chip_class) &&
|
if (s_max_slots > 4 && opinfo->second.can_channel(AluOp::t, s_chip_class) &&
|
||||||
add_trans_instructions(instr)) {
|
add_trans_instructions(instr)) {
|
||||||
instr->set_parent_group(this);
|
apply_add_instr(instr);
|
||||||
instr->pin_dest_to_chan();
|
|
||||||
m_has_kill_op |= instr->is_kill();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -128,6 +132,8 @@ AluGroup::add_trans_instructions(AluInstr *instr)
|
||||||
* make sure the corresponding vector channel is used */
|
* make sure the corresponding vector channel is used */
|
||||||
assert(instr->has_alu_flag(alu_is_trans) || m_slots[instr->dest_chan()]);
|
assert(instr->has_alu_flag(alu_is_trans) || m_slots[instr->dest_chan()]);
|
||||||
m_has_kill_op |= instr->is_kill();
|
m_has_kill_op |= instr->is_kill();
|
||||||
|
m_has_pred_update |= instr->has_alu_flag(alu_update_exec);
|
||||||
|
|
||||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = 4;
|
m_slot_assignemnt_order[m_next_slot_assignemnt++] = 4;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -170,17 +176,12 @@ AluGroup::add_vec_instructions(AluInstr *instr)
|
||||||
if (!m_slots[preferred_chan]) {
|
if (!m_slots[preferred_chan]) {
|
||||||
if (instr->bank_swizzle() != alu_vec_unknown) {
|
if (instr->bank_swizzle() != alu_vec_unknown) {
|
||||||
if (try_readport(instr, instr->bank_swizzle())) {
|
if (try_readport(instr, instr->bank_swizzle())) {
|
||||||
m_has_kill_op |= instr->is_kill();
|
|
||||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = preferred_chan;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
|
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
|
||||||
if (try_readport(instr, i)) {
|
if (try_readport(instr, i))
|
||||||
m_has_kill_op |= instr->is_kill();
|
|
||||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = preferred_chan;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -209,18 +210,12 @@ AluGroup::add_vec_instructions(AluInstr *instr)
|
||||||
sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
|
sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
|
||||||
dest->set_chan(free_chan);
|
dest->set_chan(free_chan);
|
||||||
if (instr->bank_swizzle() != alu_vec_unknown) {
|
if (instr->bank_swizzle() != alu_vec_unknown) {
|
||||||
if (try_readport(instr, instr->bank_swizzle())) {
|
if (try_readport(instr, instr->bank_swizzle()))
|
||||||
m_has_kill_op |= instr->is_kill();
|
|
||||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = free_chan;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
|
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
|
||||||
if (try_readport(instr, i)) {
|
if (try_readport(instr, i))
|
||||||
m_has_kill_op |= instr->is_kill();
|
|
||||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = free_chan;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -318,6 +313,9 @@ AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
|
||||||
else if (dest->pin() == pin_group)
|
else if (dest->pin() == pin_group)
|
||||||
dest->set_pin(pin_chgr);
|
dest->set_pin(pin_chgr);
|
||||||
}
|
}
|
||||||
|
m_has_kill_op |= instr->is_kill();
|
||||||
|
m_has_pred_update |= instr->has_alu_flag(alu_update_exec);
|
||||||
|
m_slot_assignemnt_order[m_next_slot_assignemnt++] = preferred_chan;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue