2023-04-14 17:50:15 +01:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2023 Valve Corporation
|
|
|
|
|
*
|
2024-04-08 09:02:30 +02:00
|
|
|
* SPDX-License-Identifier: MIT
|
2023-04-14 17:50:15 +01:00
|
|
|
*/
|
|
|
|
|
#include "helpers.h"
|
|
|
|
|
#include "test_d3d11_derivs-spirv.h"
|
|
|
|
|
|
|
|
|
|
using namespace aco;
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.simple)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x
|
|
|
|
|
//>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
2024-01-29 17:54:34 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.constant)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in float in_coord;
|
|
|
|
|
layout(location = 0) out float out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in float in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, vec2(in_coord, -0.5));
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, -0.5
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_mov_b32_e32 v#ry, -0.5 ; $_
|
aco/sched_ilp: new latency heuristic
The main train of thought is that we should consider latency after
the write was scheduled. This means we rely a lot less on the input
order of instructions for good results.
Foz-DB GFX1150:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01%
CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00%
Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06%
InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02%
VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07%
SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05%
VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00%
Foz-DB Navi31:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00%
CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00%
Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05%
InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01%
VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06%
SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05%
VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00%
Foz-DB Navi21:
Totals from 76224 (96.03% of 79377) affected shaders:
Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03%
CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02%
Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05%
InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01%
VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04%
SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06%
Foz-DB Vega10:
Totals from 60142 (95.42% of 63026) affected shaders:
Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02%
CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02%
Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43%
InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09%
VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09%
SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08%
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.discard)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
if (gl_FragCoord.y > 1.0)
|
|
|
|
|
discard;
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
radv: emit discard as demote by default
Also removes radv_lower_discard_to_demote debug option.
Totals from 1506 (1.90% of 79439) affected shaders: (GFX11)
MaxWaves: 46432 -> 46448 (+0.03%)
Instrs: 664515 -> 667914 (+0.51%); split: -0.15%, +0.67%
CodeSize: 3569656 -> 3583440 (+0.39%); split: -0.12%, +0.51%
VGPRs: 50100 -> 49680 (-0.84%); split: -0.96%, +0.12%
Latency: 4221359 -> 4217875 (-0.08%); split: -0.67%, +0.59%
InvThroughput: 628809 -> 625565 (-0.52%); split: -0.53%, +0.02%
VClause: 9948 -> 9965 (+0.17%); split: -0.36%, +0.53%
SClause: 19656 -> 19695 (+0.20%); split: -0.77%, +0.97%
Copies: 32113 -> 33513 (+4.36%); split: -1.59%, +5.95%
Branches: 8406 -> 8378 (-0.33%)
PreSGPRs: 42328 -> 42555 (+0.54%); split: -0.39%, +0.93%
PreVGPRs: 38451 -> 38203 (-0.64%); split: -0.78%, +0.14%
VALU: 390770 -> 390208 (-0.14%); split: -0.16%, +0.02%
SALU: 43318 -> 46374 (+7.05%); split: -0.08%, +7.14%
VMEM: 15052 -> 15051 (-0.01%)
SMEM: 37225 -> 37215 (-0.03%); split: -0.03%, +0.01%
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27617>
2024-02-06 15:01:23 +01:00
|
|
|
/* The discard gets emitted as demote_if. */
|
2024-12-20 18:16:33 +00:00
|
|
|
//>> s2: %_:exec, s1: (kill)%_:scc = s_wqm_b64 %_
|
|
|
|
|
//! p_exit_early_if_not %_:exec
|
aco: disable wqm for tex loads when not needed
By only executing VMEM loads for lanes where the result is used, we can save
bandwidth.
The NIR pass only handles tex for now, but those are most common anyway.
We can extend it handle image/ssbo/ubo/global loads in the future.
Foz-DB GFX1201:
Totals from 32633 (40.66% of 80251) affected shaders:
Instrs: 22635910 -> 23193509 (+2.46%); split: -0.00%, +2.46%
CodeSize: 122880044 -> 125093428 (+1.80%); split: -0.00%, +1.81%
VGPRs: 1481868 -> 1481712 (-0.01%)
SpillSGPRs: 3877 -> 4301 (+10.94%); split: -0.52%, +11.45%
Latency: 171480552 -> 171685219 (+0.12%); split: -0.18%, +0.30%
InvThroughput: 24364743 -> 24373441 (+0.04%); split: -0.08%, +0.12%
VClause: 388318 -> 388557 (+0.06%); split: -0.06%, +0.13%
SClause: 774781 -> 776492 (+0.22%); split: -0.29%, +0.51%
Copies: 1416586 -> 1541199 (+8.80%); split: -0.16%, +8.96%
Branches: 419591 -> 419673 (+0.02%); split: -0.02%, +0.04%
PreSGPRs: 1330303 -> 1416540 (+6.48%)
PreVGPRs: 964864 -> 964863 (-0.00%)
VALU: 12919601 -> 12920254 (+0.01%); split: -0.01%, +0.01%
SALU: 2685402 -> 3224147 (+20.06%); split: -0.00%, +20.07%
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35970>
2025-07-06 23:12:58 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_, %_, (kill)%_ 2d disable_wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.bias)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord, gl_FragCoord.x);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> image_sample_b v[#_:#_], [v#rb, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.offset)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = textureOffset(tex, in_coord, ivec2(1, 2));
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
/* Use GFX9 because we should have at least one test which doesn't use NSA. */
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX9));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
|
|
|
|
//>> v1: %offset = p_parallelcopy 0x201
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample_o (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%offset 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
|
|
|
|
//>> BB1:
|
|
|
|
|
//>> v_mov_b32_e32 v#ro_tmp, 0x201 ; $_ $_
|
|
|
|
|
//>> v_mov_b32_e32 v#ro, v#r0_tmp ; $_
|
|
|
|
|
//; success = ro+1 == rx and ro+2 == ry
|
|
|
|
|
//>> image_sample_o v[#_:#_], v[#ro:#rx], s[#_:#_], s[#_:#_] dmask:0xf ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.array)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec3 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2DArray tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %layer = v_rndne_f32 (kill)%_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr (kill)%_, (kill)%_, (kill)%layer
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
|
|
|
|
//; success = rx+1 == ry and rx+2 == rl
|
|
|
|
|
//>> image_sample v[#_:#_], v[#rx:#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.bias_array)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec3 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2DArray tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord, gl_FragCoord.x);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
|
|
|
|
|
//>> v1: %layer = v_rndne_f32 (kill)%_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv4: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_, (kill)%layer
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, %wqm, (kill)%bias 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
|
|
|
|
//>> image_sample_b v[#_:#_], [v2, v#rx, v#ry, v#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
2023-05-26 19:14:31 +01:00
|
|
|
BEGIN_TEST(d3d11_derivs._1d_gfx9)
|
|
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in float in_coord;
|
|
|
|
|
layout(location = 0) out float out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in float in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler1D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX9));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, 0.5
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
|
aco/sched_ilp: new latency heuristic
The main train of thought is that we should consider latency after
the write was scheduled. This means we rely a lot less on the input
order of instructions for good results.
Foz-DB GFX1150:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01%
CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00%
Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06%
InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02%
VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07%
SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05%
VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00%
Foz-DB Navi31:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00%
CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00%
Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05%
InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01%
VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06%
SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05%
VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00%
Foz-DB Navi21:
Totals from 76224 (96.03% of 79377) affected shaders:
Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03%
CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02%
Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05%
InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01%
VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04%
SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06%
Foz-DB Vega10:
Totals from 60142 (95.42% of 63026) affected shaders:
Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02%
CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02%
Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43%
InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09%
VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09%
SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08%
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//; success = rx+1 == ry
|
|
|
|
|
//>> image_sample v[#_:#_], v#rx, s[#_:#_], s[#_:#_] dmask:0xf ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
2023-05-26 19:14:31 +01:00
|
|
|
BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
|
|
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler1DArray tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX9));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %layer = v_rndne_f32 (kill)%_
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, 0.5, (kill)%layer
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
aco/sched_ilp: new latency heuristic
The main train of thought is that we should consider latency after
the write was scheduled. This means we rely a lot less on the input
order of instructions for good results.
Foz-DB GFX1150:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01%
CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00%
Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06%
InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02%
VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07%
SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05%
VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00%
Foz-DB Navi31:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00%
CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00%
Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05%
InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01%
VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06%
SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05%
VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00%
Foz-DB Navi21:
Totals from 76224 (96.03% of 79377) affected shaders:
Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03%
CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02%
Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05%
InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01%
VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04%
SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06%
Foz-DB Vega10:
Totals from 60142 (95.42% of 63026) affected shaders:
Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02%
CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02%
Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43%
InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09%
VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09%
SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08%
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
|
|
|
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.y ; $_
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2025-01-24 08:42:00 +01:00
|
|
|
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
2023-10-12 10:52:45 +02:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
|
|
|
|
//; success = rx+1 == ry and rx+2 == rl
|
|
|
|
|
//>> image_sample v[#_:#_], v#rx, s[#_:#_], s[#_:#_] dmask:0xf da ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.cube)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec3 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform samplerCube tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
|
|
|
|
|
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
|
|
|
|
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
2024-01-29 17:54:34 +00:00
|
|
|
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
|
|
|
|
|
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
|
|
|
|
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
aco/ra: re-use registers from killed operands
Totals from 77283 (97.34% of 79395) affected shaders: (GFX11)
MaxWaves: 2348498 -> 2348250 (-0.01%); split: +0.01%, -0.02%
Instrs: 45304558 -> 45097367 (-0.46%); split: -0.57%, +0.11%
CodeSize: 235719656 -> 234957768 (-0.32%); split: -0.43%, +0.11%
VGPRs: 3065984 -> 3073244 (+0.24%); split: -0.41%, +0.65%
Latency: 308010576 -> 307008565 (-0.33%); split: -0.85%, +0.52%
InvThroughput: 49560307 -> 49464214 (-0.19%); split: -0.54%, +0.34%
VClause: 881895 -> 879739 (-0.24%); split: -0.78%, +0.53%
SClause: 1388139 -> 1374634 (-0.97%); split: -1.12%, +0.14%
Copies: 2918583 -> 2910434 (-0.28%); split: -1.92%, +1.64%
Branches: 893947 -> 893712 (-0.03%); split: -0.06%, +0.03%
VALU: 25260728 -> 25256766 (-0.02%); split: -0.20%, +0.19%
SALU: 4377750 -> 4373595 (-0.09%); split: -0.17%, +0.07%
VOPD: 8603 -> 9163 (+6.51%); split: +8.54%, -2.03%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29235>
2024-04-19 11:55:28 +02:00
|
|
|
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//; success = rx+1 == ry and rx+2 == rf
|
|
|
|
|
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.cube_array)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec4 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec4 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform samplerCubeArray tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
|
|
|
|
|
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
|
|
|
|
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
radv: move alu
The stats are decent now that aco has an ILP scheduler
Foz-DB Navi31:
Totals from 73549 (92.59% of 79439) affected shaders:
MaxWaves: 2226952 -> 2229352 (+0.11%); split: +0.21%, -0.10%
Instrs: 44690384 -> 44905884 (+0.48%); split: -0.10%, +0.58%
CodeSize: 232666088 -> 233474808 (+0.35%); split: -0.10%, +0.45%
VGPRs: 2998036 -> 2986936 (-0.37%); split: -0.58%, +0.21%
SpillSGPRs: 7176 -> 7170 (-0.08%); split: -0.53%, +0.45%
SpillVGPRs: 1124 -> 1068 (-4.98%); split: -5.07%, +0.09%
Scratch: 6981632 -> 6977792 (-0.06%)
Latency: 297998345 -> 298541597 (+0.18%); split: -0.35%, +0.53%
InvThroughput: 49162321 -> 49039572 (-0.25%); split: -0.46%, +0.21%
VClause: 881737 -> 884147 (+0.27%); split: -0.35%, +0.62%
SClause: 1371928 -> 1373973 (+0.15%); split: -0.78%, +0.92%
Copies: 2920492 -> 2927281 (+0.23%); split: -0.84%, +1.08%
Branches: 890209 -> 890121 (-0.01%); split: -0.03%, +0.02%
PreSGPRs: 2376670 -> 2377251 (+0.02%); split: -0.25%, +0.28%
PreVGPRs: 2229634 -> 2208966 (-0.93%); split: -1.04%, +0.11%
VALU: 25124040 -> 25127521 (+0.01%); split: -0.07%, +0.08%
SALU: 4343167 -> 4361062 (+0.41%); split: -0.23%, +0.65%
VMEM: 1582363 -> 1582245 (-0.01%); split: -0.01%, +0.00%
VOPD: 8709 -> 8708 (-0.01%); split: +2.35%, -2.37%
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27032>
2024-01-12 11:49:30 +01:00
|
|
|
//>> v1: %layer = v_rndne_f32 (kill)%_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm cube da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_
|
radv: move alu
The stats are decent now that aco has an ILP scheduler
Foz-DB Navi31:
Totals from 73549 (92.59% of 79439) affected shaders:
MaxWaves: 2226952 -> 2229352 (+0.11%); split: +0.21%, -0.10%
Instrs: 44690384 -> 44905884 (+0.48%); split: -0.10%, +0.58%
CodeSize: 232666088 -> 233474808 (+0.35%); split: -0.10%, +0.45%
VGPRs: 2998036 -> 2986936 (-0.37%); split: -0.58%, +0.21%
SpillSGPRs: 7176 -> 7170 (-0.08%); split: -0.53%, +0.45%
SpillVGPRs: 1124 -> 1068 (-4.98%); split: -5.07%, +0.09%
Scratch: 6981632 -> 6977792 (-0.06%)
Latency: 297998345 -> 298541597 (+0.18%); split: -0.35%, +0.53%
InvThroughput: 49162321 -> 49039572 (-0.25%); split: -0.46%, +0.21%
VClause: 881737 -> 884147 (+0.27%); split: -0.35%, +0.62%
SClause: 1371928 -> 1373973 (+0.15%); split: -0.78%, +0.92%
Copies: 2920492 -> 2927281 (+0.23%); split: -0.84%, +1.08%
Branches: 890209 -> 890121 (-0.01%); split: -0.03%, +0.02%
PreSGPRs: 2376670 -> 2377251 (+0.02%); split: -0.25%, +0.28%
PreVGPRs: 2229634 -> 2208966 (-0.93%); split: -1.04%, +0.11%
VALU: 25124040 -> 25127521 (+0.01%); split: -0.07%, +0.08%
SALU: 4343167 -> 4361062 (+0.41%); split: -0.23%, +0.65%
VMEM: 1582363 -> 1582245 (-0.01%); split: -0.01%, +0.00%
VOPD: 8709 -> 8708 (-0.01%); split: +2.35%, -2.37%
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27032>
2024-01-12 11:49:30 +01:00
|
|
|
|
aco/sched_ilp: new latency heuristic
The main train of thought is that we should consider latency after
the write was scheduled. This means we rely a lot less on the input
order of instructions for good results.
Foz-DB GFX1150:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01%
CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00%
Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06%
InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02%
VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07%
SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05%
VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00%
Foz-DB Navi31:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00%
CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00%
Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05%
InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01%
VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06%
SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05%
VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00%
Foz-DB Navi21:
Totals from 76224 (96.03% of 79377) affected shaders:
Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03%
CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02%
Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05%
InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01%
VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04%
SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06%
Foz-DB Vega10:
Totals from 60142 (95.42% of 63026) affected shaders:
Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02%
CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02%
Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43%
InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09%
VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09%
SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08%
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
|
|
|
//>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_
|
aco/sched_ilp: base latency and issue cycles on aco_statistics
This matters for trans and scalar fpu instructions.
Foz-DB GFX1150:
Totals from 53894 (67.90% of 79377) affected shaders:
Instrs: 38528421 -> 38481337 (-0.12%); split: -0.16%, +0.04%
CodeSize: 200206016 -> 200023916 (-0.09%); split: -0.12%, +0.03%
Latency: 265011734 -> 264303762 (-0.27%); split: -0.28%, +0.02%
InvThroughput: 53804490 -> 53696097 (-0.20%); split: -0.21%, +0.01%
VClause: 736996 -> 736988 (-0.00%); split: -0.00%, +0.00%
SClause: 1118494 -> 1118474 (-0.00%); split: -0.01%, +0.01%
VALU: 21982349 -> 21982358 (+0.00%); split: -0.00%, +0.00%
Foz-DB Navi31:
Totals from 50791 (63.99% of 79377) affected shaders:
Instrs: 37511862 -> 37495712 (-0.04%); split: -0.11%, +0.07%
CodeSize: 197990892 -> 197925104 (-0.03%); split: -0.09%, +0.06%
Latency: 261929261 -> 261273534 (-0.25%); split: -0.27%, +0.01%
InvThroughput: 43978329 -> 43921618 (-0.13%); split: -0.14%, +0.01%
VClause: 727683 -> 727695 (+0.00%); split: -0.00%, +0.00%
SClause: 1092527 -> 1092544 (+0.00%); split: -0.01%, +0.01%
VALU: 22646553 -> 22646566 (+0.00%)
Foz-DB Navi21:
Totals from 43899 (55.30% of 79377) affected shaders:
Instrs: 35649081 -> 35649110 (+0.00%); split: -0.00%, +0.00%
CodeSize: 192336212 -> 192337276 (+0.00%); split: -0.00%, +0.00%
Latency: 270621538 -> 270221431 (-0.15%); split: -0.16%, +0.02%
InvThroughput: 66757841 -> 66715918 (-0.06%); split: -0.07%, +0.01%
VClause: 734884 -> 734867 (-0.00%); split: -0.01%, +0.01%
SClause: 1072956 -> 1072951 (-0.00%); split: -0.01%, +0.01%
Foz-DB Vega10:
Totals from 52687 (83.60% of 63026) affected shaders:
Instrs: 24595280 -> 24595693 (+0.00%); split: -0.01%, +0.01%
CodeSize: 127199836 -> 127200164 (+0.00%); split: -0.01%, +0.01%
Latency: 252281578 -> 252497934 (+0.09%); split: -0.03%, +0.12%
InvThroughput: 136551527 -> 136577609 (+0.02%); split: -0.01%, +0.03%
VClause: 536798 -> 536718 (-0.01%); split: -0.04%, +0.03%
SClause: 819978 -> 819693 (-0.03%); split: -0.04%, +0.01%
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 16:31:23 +01:00
|
|
|
//>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_
|
2024-01-29 17:54:34 +00:00
|
|
|
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
|
|
|
|
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
aco/sched_ilp: new latency heuristic
The main train of thought is that we should consider latency after
the write was scheduled. This means we rely a lot less on the input
order of instructions for good results.
Foz-DB GFX1150:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 43274326 -> 42129011 (-2.65%); split: -2.65%, +0.01%
CodeSize: 223049932 -> 218465796 (-2.06%); split: -2.06%, +0.00%
Latency: 297614199 -> 292317054 (-1.78%); split: -1.84%, +0.06%
InvThroughput: 57020160 -> 56336213 (-1.20%); split: -1.21%, +0.02%
VClause: 841775 -> 841861 (+0.01%); split: -0.06%, +0.07%
SClause: 1253516 -> 1253798 (+0.02%); split: -0.03%, +0.05%
VALU: 23893837 -> 23893828 (-0.00%); split: -0.00%, +0.00%
Foz-DB Navi31:
Totals from 75606 (95.25% of 79377) affected shaders:
Instrs: 42717592 -> 41531696 (-2.78%); split: -2.78%, +0.00%
CodeSize: 223582476 -> 218866196 (-2.11%); split: -2.11%, +0.00%
Latency: 297736383 -> 292450493 (-1.78%); split: -1.83%, +0.05%
InvThroughput: 47298730 -> 46934084 (-0.77%); split: -0.78%, +0.01%
VClause: 844982 -> 844892 (-0.01%); split: -0.07%, +0.06%
SClause: 1248433 -> 1248693 (+0.02%); split: -0.03%, +0.05%
VALU: 24819703 -> 24819704 (+0.00%); split: -0.00%, +0.00%
Foz-DB Navi21:
Totals from 76224 (96.03% of 79377) affected shaders:
Instrs: 46019515 -> 46015691 (-0.01%); split: -0.03%, +0.03%
CodeSize: 246992544 -> 246977404 (-0.01%); split: -0.03%, +0.02%
Latency: 324647457 -> 318661132 (-1.84%); split: -1.90%, +0.05%
InvThroughput: 74834800 -> 74269723 (-0.76%); split: -0.76%, +0.01%
VClause: 927601 -> 927579 (-0.00%); split: -0.04%, +0.04%
SClause: 1302666 -> 1303178 (+0.04%); split: -0.02%, +0.06%
Foz-DB Vega10:
Totals from 60142 (95.42% of 63026) affected shaders:
Instrs: 25117688 -> 25098175 (-0.08%); split: -0.10%, +0.02%
CodeSize: 129847464 -> 129769456 (-0.06%); split: -0.08%, +0.02%
Latency: 261606546 -> 262407481 (+0.31%); split: -0.12%, +0.43%
InvThroughput: 138422594 -> 138500401 (+0.06%); split: -0.03%, +0.09%
VClause: 555424 -> 555321 (-0.02%); split: -0.11%, +0.09%
SClause: 851219 -> 851620 (+0.05%); split: -0.03%, +0.08%
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33222>
2025-01-26 13:41:53 +01:00
|
|
|
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
|
radv: move alu
The stats are decent now that aco has an ILP scheduler
Foz-DB Navi31:
Totals from 73549 (92.59% of 79439) affected shaders:
MaxWaves: 2226952 -> 2229352 (+0.11%); split: +0.21%, -0.10%
Instrs: 44690384 -> 44905884 (+0.48%); split: -0.10%, +0.58%
CodeSize: 232666088 -> 233474808 (+0.35%); split: -0.10%, +0.45%
VGPRs: 2998036 -> 2986936 (-0.37%); split: -0.58%, +0.21%
SpillSGPRs: 7176 -> 7170 (-0.08%); split: -0.53%, +0.45%
SpillVGPRs: 1124 -> 1068 (-4.98%); split: -5.07%, +0.09%
Scratch: 6981632 -> 6977792 (-0.06%)
Latency: 297998345 -> 298541597 (+0.18%); split: -0.35%, +0.53%
InvThroughput: 49162321 -> 49039572 (-0.25%); split: -0.46%, +0.21%
VClause: 881737 -> 884147 (+0.27%); split: -0.35%, +0.62%
SClause: 1371928 -> 1373973 (+0.15%); split: -0.78%, +0.92%
Copies: 2920492 -> 2927281 (+0.23%); split: -0.84%, +1.08%
Branches: 890209 -> 890121 (-0.01%); split: -0.03%, +0.02%
PreSGPRs: 2376670 -> 2377251 (+0.02%); split: -0.25%, +0.28%
PreVGPRs: 2229634 -> 2208966 (-0.93%); split: -1.04%, +0.11%
VALU: 25124040 -> 25127521 (+0.01%); split: -0.07%, +0.08%
SALU: 4343167 -> 4361062 (+0.41%); split: -0.23%, +0.65%
VMEM: 1582363 -> 1582245 (-0.01%); split: -0.01%, +0.00%
VOPD: 8709 -> 8708 (-0.01%); split: +2.35%, -2.37%
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27032>
2024-01-12 11:49:30 +01:00
|
|
|
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
|
|
|
|
//; success = rx+1 == ry and rx+2 == rlf
|
|
|
|
|
//>> image_sample v[#_:#_], v[#rx:#rlf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
2024-07-25 10:14:20 -04:00
|
|
|
BEGIN_TEST(d3d11_derivs.dfdxy)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = vec4(dFdxFine(in_coord.x), dFdyCoarse(in_coord.y), textureLod(tex, vec2(0.5), 0.0).xy);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
/* Must be before BB1 */
|
aco: reorder dpp for ddx/ddy
Having the mov last allows us to fuse it with the use instruction.
Foz-DB Navi31:
Totals from 9400 (11.84% of 79395) affected shaders:
MaxWaves: 273998 -> 274030 (+0.01%)
Instrs: 8303778 -> 8282997 (-0.25%); split: -0.29%, +0.04%
CodeSize: 44428088 -> 44464860 (+0.08%); split: -0.09%, +0.18%
VGPRs: 506616 -> 504492 (-0.42%)
SpillSGPRs: 1389 -> 1393 (+0.29%)
Latency: 76923466 -> 76983332 (+0.08%); split: -0.06%, +0.14%
InvThroughput: 12386888 -> 12391262 (+0.04%); split: -0.04%, +0.07%
VClause: 125136 -> 125059 (-0.06%); split: -0.13%, +0.07%
SClause: 227361 -> 226615 (-0.33%); split: -0.43%, +0.10%
Copies: 440787 -> 440749 (-0.01%); split: -0.17%, +0.16%
PreVGPRs: 339783 -> 333343 (-1.90%); split: -1.92%, +0.02%
VALU: 5088362 -> 5069737 (-0.37%); split: -0.37%, +0.01%
SALU: 606596 -> 606609 (+0.00%); split: -0.01%, +0.01%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30350>
2024-07-26 20:10:43 +02:00
|
|
|
//>> v1: %_ = v_subrev_f32 (kill)%_, (kill)%_ quad_perm:[0,0,2,2] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %_ = v_subrev_f32 (kill)%_, (kill)%_ quad_perm:[0,0,0,0] bound_ctrl:1 fi
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
/* Ensure the BC optimize transform is done after ac_nir_lower_tex. */
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.bc_optimize)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, vec2(in_coord.x, interpolateAtCentroid(in_coord.y)));
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %y_coord2 = v_cndmask_b32 (kill)%_, %_, (kill)%_
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 (kill)%_, %_:m0, (kill)%_ attr0.x
|
|
|
|
|
//>> v1: %y = v_interp_p2_f32 (kill)%y_coord2, (kill)%_:m0, (kill)%_ attr0.y
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, %wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.get_lod)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec2(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = textureQueryLod(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x
|
|
|
|
|
//>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr %x, %y
|
aco: reorder dpp for ddx/ddy
Having the mov last allows us to fuse it with the use instruction.
Foz-DB Navi31:
Totals from 9400 (11.84% of 79395) affected shaders:
MaxWaves: 273998 -> 274030 (+0.01%)
Instrs: 8303778 -> 8282997 (-0.25%); split: -0.29%, +0.04%
CodeSize: 44428088 -> 44464860 (+0.08%); split: -0.09%, +0.18%
VGPRs: 506616 -> 504492 (-0.42%)
SpillSGPRs: 1389 -> 1393 (+0.29%)
Latency: 76923466 -> 76983332 (+0.08%); split: -0.06%, +0.14%
InvThroughput: 12386888 -> 12391262 (+0.04%); split: -0.04%, +0.07%
VClause: 125136 -> 125059 (-0.06%); split: -0.13%, +0.07%
SClause: 227361 -> 226615 (-0.33%); split: -0.43%, +0.10%
Copies: 440787 -> 440749 (-0.01%); split: -0.17%, +0.16%
PreVGPRs: 339783 -> 333343 (-1.90%); split: -1.92%, +0.02%
VALU: 5088362 -> 5069737 (-0.37%); split: -0.37%, +0.01%
SALU: 606596 -> 606609 (+0.00%); split: -0.01%, +0.01%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30350>
2024-07-26 20:10:43 +02:00
|
|
|
//>> v1: %x12_m_x0 = v_subrev_f32 (kill)%x, (kill)%x quad_perm:[0,0,0,0] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %x1_m_x0 = v_mov_b32 %x12_m_x0 quad_perm:[1,1,1,1] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %x2_m_x0 = v_mov_b32 (kill)%x12_m_x0 quad_perm:[2,2,2,2] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %y12_m_y0 = v_subrev_f32 (kill)%y, (kill)%y quad_perm:[0,0,0,0] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %y1_m_y0 = v_mov_b32 %y12_m_x0 quad_perm:[1,1,1,1] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %y2_m_y0 = v_mov_b32 (kill)%y12_m_x0 quad_perm:[2,2,2,2] bound_ctrl:1 fi
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2025-04-02 14:24:04 +02:00
|
|
|
//>> v2: %_ = image_get_lod (kill)%_, (kill)%_, v1: undef, %wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.nsa_max)
|
|
|
|
|
for (amd_gfx_level lvl : {GFX10, GFX10_3, GFX11}) {
|
|
|
|
|
if (!setup_cs(NULL, lvl))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
PhysReg reg_v0{256};
|
|
|
|
|
PhysReg reg_v6{256 + 6};
|
|
|
|
|
PhysReg reg_v7{256 + 7};
|
|
|
|
|
PhysReg reg_v8{256 + 8};
|
2024-04-23 15:59:57 +02:00
|
|
|
PhysReg reg_s0{0};
|
|
|
|
|
PhysReg reg_s8{8};
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
//>> p_unit_test 0
|
|
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
|
|
|
|
|
|
|
|
|
//~gfx10! v2: %_:v[0-1] = v_lshrrev_b64 0, %_:v[6-7]
|
|
|
|
|
//~gfx10! v1: %_:v[2] = v_mov_b32 %_:v[8]
|
2024-04-23 15:59:57 +02:00
|
|
|
//~gfx10! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[0-5] 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
|
2024-04-23 15:59:57 +02:00
|
|
|
//~gfx10_3! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4], %_:v[5] 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
|
2024-04-23 15:59:57 +02:00
|
|
|
//~gfx11! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4-5] 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
Instruction* instr =
|
2024-04-23 15:59:57 +02:00
|
|
|
bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4), Operand(reg_s0, s8),
|
|
|
|
|
Operand(reg_s8, s4), Operand(v1), Operand(reg_v0, v6.as_linear()),
|
|
|
|
|
Operand(reg_v6, v1), Operand(reg_v7, v1), Operand(reg_v8, v1));
|
2023-04-14 17:50:15 +01:00
|
|
|
instr->mimg().dim = ac_image_2darray;
|
|
|
|
|
instr->mimg().da = true;
|
|
|
|
|
instr->mimg().strict_wqm = true;
|
|
|
|
|
|
|
|
|
|
finish_to_hw_instr_test();
|
|
|
|
|
}
|
|
|
|
|
END_TEST
|