2023-04-14 17:50:15 +01:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2023 Valve Corporation
|
|
|
|
|
*
|
2024-04-08 09:02:30 +02:00
|
|
|
* SPDX-License-Identifier: MIT
|
2023-04-14 17:50:15 +01:00
|
|
|
*/
|
|
|
|
|
#include "helpers.h"
|
|
|
|
|
#include "test_d3d11_derivs-spirv.h"
|
|
|
|
|
|
|
|
|
|
using namespace aco;
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.simple)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x
|
|
|
|
|
//>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
2024-01-29 17:54:34 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.constant)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in float in_coord;
|
|
|
|
|
layout(location = 0) out float out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in float in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, vec2(in_coord, -0.5));
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, -0.5
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_mov_b32_e32 v#ry, -0.5 ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> image_sample v[#_:#_], v[#rx:#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.discard)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
if (gl_FragCoord.y > 1.0)
|
|
|
|
|
discard;
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
radv: emit discard as demote by default
Also removes radv_lower_discard_to_demote debug option.
Totals from 1506 (1.90% of 79439) affected shaders: (GFX11)
MaxWaves: 46432 -> 46448 (+0.03%)
Instrs: 664515 -> 667914 (+0.51%); split: -0.15%, +0.67%
CodeSize: 3569656 -> 3583440 (+0.39%); split: -0.12%, +0.51%
VGPRs: 50100 -> 49680 (-0.84%); split: -0.96%, +0.12%
Latency: 4221359 -> 4217875 (-0.08%); split: -0.67%, +0.59%
InvThroughput: 628809 -> 625565 (-0.52%); split: -0.53%, +0.02%
VClause: 9948 -> 9965 (+0.17%); split: -0.36%, +0.53%
SClause: 19656 -> 19695 (+0.20%); split: -0.77%, +0.97%
Copies: 32113 -> 33513 (+4.36%); split: -1.59%, +5.95%
Branches: 8406 -> 8378 (-0.33%)
PreSGPRs: 42328 -> 42555 (+0.54%); split: -0.39%, +0.93%
PreVGPRs: 38451 -> 38203 (-0.64%); split: -0.78%, +0.14%
VALU: 390770 -> 390208 (-0.14%); split: -0.16%, +0.02%
SALU: 43318 -> 46374 (+7.05%); split: -0.08%, +7.14%
VMEM: 15052 -> 15051 (-0.01%)
SMEM: 37225 -> 37215 (-0.03%); split: -0.03%, +0.01%
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27617>
2024-02-06 15:01:23 +01:00
|
|
|
/* The discard gets emitted as demote_if. */
|
2024-12-20 18:16:33 +00:00
|
|
|
//>> s2: %_:exec, s1: (kill)%_:scc = s_wqm_b64 %_
|
|
|
|
|
//! p_exit_early_if_not %_:exec
|
radv: emit discard as demote by default
Also removes radv_lower_discard_to_demote debug option.
Totals from 1506 (1.90% of 79439) affected shaders: (GFX11)
MaxWaves: 46432 -> 46448 (+0.03%)
Instrs: 664515 -> 667914 (+0.51%); split: -0.15%, +0.67%
CodeSize: 3569656 -> 3583440 (+0.39%); split: -0.12%, +0.51%
VGPRs: 50100 -> 49680 (-0.84%); split: -0.96%, +0.12%
Latency: 4221359 -> 4217875 (-0.08%); split: -0.67%, +0.59%
InvThroughput: 628809 -> 625565 (-0.52%); split: -0.53%, +0.02%
VClause: 9948 -> 9965 (+0.17%); split: -0.36%, +0.53%
SClause: 19656 -> 19695 (+0.20%); split: -0.77%, +0.97%
Copies: 32113 -> 33513 (+4.36%); split: -1.59%, +5.95%
Branches: 8406 -> 8378 (-0.33%)
PreSGPRs: 42328 -> 42555 (+0.54%); split: -0.39%, +0.93%
PreVGPRs: 38451 -> 38203 (-0.64%); split: -0.78%, +0.14%
VALU: 390770 -> 390208 (-0.14%); split: -0.16%, +0.02%
SALU: 43318 -> 46374 (+7.05%); split: -0.08%, +7.14%
VMEM: 15052 -> 15051 (-0.01%)
SMEM: 37225 -> 37215 (-0.03%); split: -0.03%, +0.01%
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27617>
2024-02-06 15:01:23 +01:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_ 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.bias)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord, gl_FragCoord.x);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%bias 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> image_sample_b v[#_:#_], [v#rb, v#rx, v#ry], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D ; $_ $_ $_
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.offset)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = textureOffset(tex, in_coord, ivec2(1, 2));
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
/* Use GFX9 because we should have at least one test which doesn't use NSA. */
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX9));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
|
|
|
|
//>> v1: %offset = p_parallelcopy 0x201
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample_o (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%offset 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
|
|
|
|
//>> BB1:
|
|
|
|
|
//>> v_mov_b32_e32 v#ro_tmp, 0x201 ; $_ $_
|
|
|
|
|
//>> v_mov_b32_e32 v#ro, v#r0_tmp ; $_
|
|
|
|
|
//; success = ro+1 == rx and ro+2 == ry
|
|
|
|
|
//>> image_sample_o v[#_:#_], v[#ro:#rx], s[#_:#_], s[#_:#_] dmask:0xf ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.array)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec3 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2DArray tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %layer = v_rndne_f32 (kill)%_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr (kill)%_, (kill)%_, (kill)%layer
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
|
|
|
|
//; success = rx+1 == ry and rx+2 == rl
|
|
|
|
|
//>> image_sample v[#_:#_], v[#rx:#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.bias_array)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec3 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2DArray tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord, gl_FragCoord.x);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> s2: %_:s[0-1], s1: %_:s[2], s1: %_:s[3], s1: %_:s[4], v2: %_:v[0-1], v1: %bias:v[2] = p_startpgm
|
|
|
|
|
//>> v1: %layer = v_rndne_f32 (kill)%_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv4: %wqm = p_start_linear_vgpr v1: undef, (kill)%_, (kill)%_, (kill)%layer
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample_b (kill)%_, (kill)%_, v1: undef, (latekill)%wqm, (kill)%bias 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.z ; $_
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#ry_tmp, v#_, attr0.y ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
|
|
|
|
//>> image_sample_b v[#_:#_], [v2, v#rx, v#ry, v#rl], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; $_ $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
2023-05-26 19:14:31 +01:00
|
|
|
BEGIN_TEST(d3d11_derivs._1d_gfx9)
|
|
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in float in_coord;
|
|
|
|
|
layout(location = 0) out float out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in float in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler1D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX9));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, 0.5
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//; success = rx+1 == ry
|
|
|
|
|
//>> image_sample v[#_:#_], v#rx, s[#_:#_], s[#_:#_] dmask:0xf ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
2023-05-26 19:14:31 +01:00
|
|
|
BEGIN_TEST(d3d11_derivs._1d_array_gfx9)
|
|
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler1DArray tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX9));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %layer = v_rndne_f32 (kill)%_
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.x
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, 0.5, (kill)%layer
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rl_tmp, v#_, attr0.y ; $_
|
|
|
|
|
//>> v_interp_p2_f32_e32 v#rx_tmp, v#_, attr0.x ; $_
|
|
|
|
|
//>> v_mov_b32_e32 v#ry, 0.5 ; $_
|
2025-01-24 08:42:00 +01:00
|
|
|
//>> v_rndne_f32_e32 v#rl_tmp, v#rl_tmp ; $_
|
2023-10-12 10:52:45 +02:00
|
|
|
//>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_mov_b32_e32 v#rl, v#rl_tmp ; $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
|
|
|
|
//; success = rx+1 == ry and rx+2 == rl
|
|
|
|
|
//>> image_sample v[#_:#_], v#rx, s[#_:#_], s[#_:#_] dmask:0xf da ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.cube)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec3 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec3 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform samplerCube tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
|
|
|
|
|
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
|
|
|
|
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm cube da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
2024-01-29 17:54:34 +00:00
|
|
|
//>> v_cubeid_f32 v#rf_tmp, v#_, v#_, v#_ ; $_ $_
|
|
|
|
|
//>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_
|
aco/ra: rework linear VGPR allocation
We allocate them at the end of the register file and keep them separate
from normal VGPRs. This is for two reasons:
- Because we only ever move linear VGPRs into an empty space or a space
previously occupied by a linear one, we never have to swap a normal VGPR
and a linear one. This simplifies copy lowering.
- As linear VGPR's live ranges only start and end on top-level blocks, we
never have to move a linear VGPR in control flow.
fossil-db (navi31):
Totals from 5493 (6.93% of 79242) affected shaders:
MaxWaves: 150365 -> 150343 (-0.01%)
Instrs: 7974740 -> 7976073 (+0.02%); split: -0.06%, +0.08%
CodeSize: 41296024 -> 41299024 (+0.01%); split: -0.06%, +0.06%
VGPRs: 283192 -> 329560 (+16.37%)
Latency: 64267936 -> 64268414 (+0.00%); split: -0.17%, +0.17%
InvThroughput: 10954037 -> 10951735 (-0.02%); split: -0.09%, +0.07%
VClause: 132792 -> 132956 (+0.12%); split: -0.06%, +0.18%
SClause: 223854 -> 223841 (-0.01%); split: -0.01%, +0.01%
Copies: 559574 -> 561395 (+0.33%); split: -0.24%, +0.56%
Branches: 179630 -> 179636 (+0.00%); split: -0.02%, +0.02%
VALU: 4572683 -> 4574487 (+0.04%); split: -0.03%, +0.07%
SALU: 772076 -> 772111 (+0.00%); split: -0.01%, +0.01%
VOPD: 1095 -> 1099 (+0.37%); split: +0.73%, -0.37%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27697>
2024-02-14 19:55:59 +00:00
|
|
|
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
|
|
|
|
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
aco/ra: re-use registers from killed operands
Totals from 77283 (97.34% of 79395) affected shaders: (GFX11)
MaxWaves: 2348498 -> 2348250 (-0.01%); split: +0.01%, -0.02%
Instrs: 45304558 -> 45097367 (-0.46%); split: -0.57%, +0.11%
CodeSize: 235719656 -> 234957768 (-0.32%); split: -0.43%, +0.11%
VGPRs: 3065984 -> 3073244 (+0.24%); split: -0.41%, +0.65%
Latency: 308010576 -> 307008565 (-0.33%); split: -0.85%, +0.52%
InvThroughput: 49560307 -> 49464214 (-0.19%); split: -0.54%, +0.34%
VClause: 881895 -> 879739 (-0.24%); split: -0.78%, +0.53%
SClause: 1388139 -> 1374634 (-0.97%); split: -1.12%, +0.14%
Copies: 2918583 -> 2910434 (-0.28%); split: -1.92%, +1.64%
Branches: 893947 -> 893712 (-0.03%); split: -0.06%, +0.03%
VALU: 25260728 -> 25256766 (-0.02%); split: -0.20%, +0.19%
SALU: 4377750 -> 4373595 (-0.09%); split: -0.17%, +0.07%
VOPD: 8603 -> 9163 (+6.51%); split: +8.54%, -2.03%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29235>
2024-04-19 11:55:28 +02:00
|
|
|
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
|
2023-04-14 17:50:15 +01:00
|
|
|
//; success = rx+1 == ry and rx+2 == rf
|
|
|
|
|
//>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.cube_array)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec4 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec4 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform samplerCubeArray tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %face = v_cubeid_f32 (kill)%_, (kill)%_, (kill)%_
|
|
|
|
|
//>> v1: %x = v_fmaak_f32 (kill)%_, %_, 0x3fc00000
|
|
|
|
|
//>> v1: %y = v_fmaak_f32 (kill)%_, (kill)%_, 0x3fc00000
|
radv: move alu
The stats are decent now that aco has an ILP scheduler
Foz-DB Navi31:
Totals from 73549 (92.59% of 79439) affected shaders:
MaxWaves: 2226952 -> 2229352 (+0.11%); split: +0.21%, -0.10%
Instrs: 44690384 -> 44905884 (+0.48%); split: -0.10%, +0.58%
CodeSize: 232666088 -> 233474808 (+0.35%); split: -0.10%, +0.45%
VGPRs: 2998036 -> 2986936 (-0.37%); split: -0.58%, +0.21%
SpillSGPRs: 7176 -> 7170 (-0.08%); split: -0.53%, +0.45%
SpillVGPRs: 1124 -> 1068 (-4.98%); split: -5.07%, +0.09%
Scratch: 6981632 -> 6977792 (-0.06%)
Latency: 297998345 -> 298541597 (+0.18%); split: -0.35%, +0.53%
InvThroughput: 49162321 -> 49039572 (-0.25%); split: -0.46%, +0.21%
VClause: 881737 -> 884147 (+0.27%); split: -0.35%, +0.62%
SClause: 1371928 -> 1373973 (+0.15%); split: -0.78%, +0.92%
Copies: 2920492 -> 2927281 (+0.23%); split: -0.84%, +1.08%
Branches: 890209 -> 890121 (-0.01%); split: -0.03%, +0.02%
PreSGPRs: 2376670 -> 2377251 (+0.02%); split: -0.25%, +0.28%
PreVGPRs: 2229634 -> 2208966 (-0.93%); split: -1.04%, +0.11%
VALU: 25124040 -> 25127521 (+0.01%); split: -0.07%, +0.08%
SALU: 4343167 -> 4361062 (+0.41%); split: -0.23%, +0.65%
VMEM: 1582363 -> 1582245 (-0.01%); split: -0.01%, +0.00%
VOPD: 8709 -> 8708 (-0.01%); split: +2.35%, -2.37%
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27032>
2024-01-12 11:49:30 +01:00
|
|
|
//>> v1: %layer = v_rndne_f32 (kill)%_
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> v1: %face_layer = v_fmamk_f32 (kill)%layer, (kill)%face, 0x41000000
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv3: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y, (kill)%face_layer
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm cube da
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
|
|
|
|
|
//>> v_cubeid_f32 v#rf, v#_, v#_, v#_ ; $_ $_
|
radv: move alu
The stats are decent now that aco has an ILP scheduler
Foz-DB Navi31:
Totals from 73549 (92.59% of 79439) affected shaders:
MaxWaves: 2226952 -> 2229352 (+0.11%); split: +0.21%, -0.10%
Instrs: 44690384 -> 44905884 (+0.48%); split: -0.10%, +0.58%
CodeSize: 232666088 -> 233474808 (+0.35%); split: -0.10%, +0.45%
VGPRs: 2998036 -> 2986936 (-0.37%); split: -0.58%, +0.21%
SpillSGPRs: 7176 -> 7170 (-0.08%); split: -0.53%, +0.45%
SpillVGPRs: 1124 -> 1068 (-4.98%); split: -5.07%, +0.09%
Scratch: 6981632 -> 6977792 (-0.06%)
Latency: 297998345 -> 298541597 (+0.18%); split: -0.35%, +0.53%
InvThroughput: 49162321 -> 49039572 (-0.25%); split: -0.46%, +0.21%
VClause: 881737 -> 884147 (+0.27%); split: -0.35%, +0.62%
SClause: 1371928 -> 1373973 (+0.15%); split: -0.78%, +0.92%
Copies: 2920492 -> 2927281 (+0.23%); split: -0.84%, +1.08%
Branches: 890209 -> 890121 (-0.01%); split: -0.03%, +0.02%
PreSGPRs: 2376670 -> 2377251 (+0.02%); split: -0.25%, +0.28%
PreVGPRs: 2229634 -> 2208966 (-0.93%); split: -1.04%, +0.11%
VALU: 25124040 -> 25127521 (+0.01%); split: -0.07%, +0.08%
SALU: 4343167 -> 4361062 (+0.41%); split: -0.23%, +0.65%
VMEM: 1582363 -> 1582245 (-0.01%); split: -0.01%, +0.00%
VOPD: 8709 -> 8708 (-0.01%); split: +2.35%, -2.37%
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27032>
2024-01-12 11:49:30 +01:00
|
|
|
|
2024-01-29 17:54:34 +00:00
|
|
|
//>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
|
|
|
|
//>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_
|
radv: move alu
The stats are decent now that aco has an ILP scheduler
Foz-DB Navi31:
Totals from 73549 (92.59% of 79439) affected shaders:
MaxWaves: 2226952 -> 2229352 (+0.11%); split: +0.21%, -0.10%
Instrs: 44690384 -> 44905884 (+0.48%); split: -0.10%, +0.58%
CodeSize: 232666088 -> 233474808 (+0.35%); split: -0.10%, +0.45%
VGPRs: 2998036 -> 2986936 (-0.37%); split: -0.58%, +0.21%
SpillSGPRs: 7176 -> 7170 (-0.08%); split: -0.53%, +0.45%
SpillVGPRs: 1124 -> 1068 (-4.98%); split: -5.07%, +0.09%
Scratch: 6981632 -> 6977792 (-0.06%)
Latency: 297998345 -> 298541597 (+0.18%); split: -0.35%, +0.53%
InvThroughput: 49162321 -> 49039572 (-0.25%); split: -0.46%, +0.21%
VClause: 881737 -> 884147 (+0.27%); split: -0.35%, +0.62%
SClause: 1371928 -> 1373973 (+0.15%); split: -0.78%, +0.92%
Copies: 2920492 -> 2927281 (+0.23%); split: -0.84%, +1.08%
Branches: 890209 -> 890121 (-0.01%); split: -0.03%, +0.02%
PreSGPRs: 2376670 -> 2377251 (+0.02%); split: -0.25%, +0.28%
PreVGPRs: 2229634 -> 2208966 (-0.93%); split: -1.04%, +0.11%
VALU: 25124040 -> 25127521 (+0.01%); split: -0.07%, +0.08%
SALU: 4343167 -> 4361062 (+0.41%); split: -0.23%, +0.65%
VMEM: 1582363 -> 1582245 (-0.01%); split: -0.01%, +0.00%
VOPD: 8709 -> 8708 (-0.01%); split: +2.35%, -2.37%
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27032>
2024-01-12 11:49:30 +01:00
|
|
|
//>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_
|
aco/ra: re-use registers from killed operands
Totals from 77283 (97.34% of 79395) affected shaders: (GFX11)
MaxWaves: 2348498 -> 2348250 (-0.01%); split: +0.01%, -0.02%
Instrs: 45304558 -> 45097367 (-0.46%); split: -0.57%, +0.11%
CodeSize: 235719656 -> 234957768 (-0.32%); split: -0.43%, +0.11%
VGPRs: 3065984 -> 3073244 (+0.24%); split: -0.41%, +0.65%
Latency: 308010576 -> 307008565 (-0.33%); split: -0.85%, +0.52%
InvThroughput: 49560307 -> 49464214 (-0.19%); split: -0.54%, +0.34%
VClause: 881895 -> 879739 (-0.24%); split: -0.78%, +0.53%
SClause: 1388139 -> 1374634 (-0.97%); split: -1.12%, +0.14%
Copies: 2918583 -> 2910434 (-0.28%); split: -1.92%, +1.64%
Branches: 893947 -> 893712 (-0.03%); split: -0.06%, +0.03%
VALU: 25260728 -> 25256766 (-0.02%); split: -0.20%, +0.19%
SALU: 4377750 -> 4373595 (-0.09%); split: -0.17%, +0.07%
VOPD: 8603 -> 9163 (+6.51%); split: +8.54%, -2.03%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29235>
2024-04-19 11:55:28 +02:00
|
|
|
//>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_
|
radv: move alu
The stats are decent now that aco has an ILP scheduler
Foz-DB Navi31:
Totals from 73549 (92.59% of 79439) affected shaders:
MaxWaves: 2226952 -> 2229352 (+0.11%); split: +0.21%, -0.10%
Instrs: 44690384 -> 44905884 (+0.48%); split: -0.10%, +0.58%
CodeSize: 232666088 -> 233474808 (+0.35%); split: -0.10%, +0.45%
VGPRs: 2998036 -> 2986936 (-0.37%); split: -0.58%, +0.21%
SpillSGPRs: 7176 -> 7170 (-0.08%); split: -0.53%, +0.45%
SpillVGPRs: 1124 -> 1068 (-4.98%); split: -5.07%, +0.09%
Scratch: 6981632 -> 6977792 (-0.06%)
Latency: 297998345 -> 298541597 (+0.18%); split: -0.35%, +0.53%
InvThroughput: 49162321 -> 49039572 (-0.25%); split: -0.46%, +0.21%
VClause: 881737 -> 884147 (+0.27%); split: -0.35%, +0.62%
SClause: 1371928 -> 1373973 (+0.15%); split: -0.78%, +0.92%
Copies: 2920492 -> 2927281 (+0.23%); split: -0.84%, +1.08%
Branches: 890209 -> 890121 (-0.01%); split: -0.03%, +0.02%
PreSGPRs: 2376670 -> 2377251 (+0.02%); split: -0.25%, +0.28%
PreVGPRs: 2229634 -> 2208966 (-0.93%); split: -1.04%, +0.11%
VALU: 25124040 -> 25127521 (+0.01%); split: -0.07%, +0.08%
SALU: 4343167 -> 4361062 (+0.41%); split: -0.23%, +0.65%
VMEM: 1582363 -> 1582245 (-0.01%); split: -0.01%, +0.00%
VOPD: 8709 -> 8708 (-0.01%); split: +2.35%, -2.37%
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27032>
2024-01-12 11:49:30 +01:00
|
|
|
//>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_
|
|
|
|
|
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1:
|
|
|
|
|
//; success = rx+1 == ry and rx+2 == rlf
|
|
|
|
|
//>> image_sample v[#_:#_], v[#rx:#rlf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
2024-07-25 10:14:20 -04:00
|
|
|
BEGIN_TEST(d3d11_derivs.dfdxy)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = vec4(dFdxFine(in_coord.x), dFdyCoarse(in_coord.y), textureLod(tex, vec2(0.5), 0.0).xy);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
/* Must be before BB1 */
|
aco: reorder dpp for ddx/ddy
Having the mov last allows us to fuse it with the use instruction.
Foz-DB Navi31:
Totals from 9400 (11.84% of 79395) affected shaders:
MaxWaves: 273998 -> 274030 (+0.01%)
Instrs: 8303778 -> 8282997 (-0.25%); split: -0.29%, +0.04%
CodeSize: 44428088 -> 44464860 (+0.08%); split: -0.09%, +0.18%
VGPRs: 506616 -> 504492 (-0.42%)
SpillSGPRs: 1389 -> 1393 (+0.29%)
Latency: 76923466 -> 76983332 (+0.08%); split: -0.06%, +0.14%
InvThroughput: 12386888 -> 12391262 (+0.04%); split: -0.04%, +0.07%
VClause: 125136 -> 125059 (-0.06%); split: -0.13%, +0.07%
SClause: 227361 -> 226615 (-0.33%); split: -0.43%, +0.10%
Copies: 440787 -> 440749 (-0.01%); split: -0.17%, +0.16%
PreVGPRs: 339783 -> 333343 (-1.90%); split: -1.92%, +0.02%
VALU: 5088362 -> 5069737 (-0.37%); split: -0.37%, +0.01%
SALU: 606596 -> 606609 (+0.00%); split: -0.01%, +0.01%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30350>
2024-07-26 20:10:43 +02:00
|
|
|
//>> v1: %_ = v_subrev_f32 (kill)%_, (kill)%_ quad_perm:[0,0,2,2] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %_ = v_subrev_f32 (kill)%_, (kill)%_ quad_perm:[0,0,0,0] bound_ctrl:1 fi
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
|
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
/* Ensure the BC optimize transform is done after ac_nir_lower_tex. */
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.bc_optimize)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec4 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec4(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = texture(tex, vec2(in_coord.x, interpolateAtCentroid(in_coord.y)));
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %y_coord2 = v_cndmask_b32 (kill)%_, %_, (kill)%_
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 (kill)%_, %_:m0, (kill)%_ attr0.x
|
|
|
|
|
//>> v1: %y = v_interp_p2_f32 (kill)%y_coord2, (kill)%_:m0, (kill)%_ attr0.y
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr (kill)%x, (kill)%y
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.get_lod)
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format off
|
2023-04-14 17:50:15 +01:00
|
|
|
QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_coord;
|
|
|
|
|
void main() {
|
|
|
|
|
out_coord = in_coord;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
|
|
|
|
|
layout(location = 0) in vec2 in_coord;
|
|
|
|
|
layout(location = 0) out vec2 out_color;
|
|
|
|
|
layout(binding = 0) uniform sampler2D tex;
|
|
|
|
|
void main() {
|
|
|
|
|
out_color = vec2(0.0);
|
|
|
|
|
if (gl_FragCoord.x > 1.0)
|
|
|
|
|
out_color = textureQueryLod(tex, in_coord);
|
|
|
|
|
}
|
|
|
|
|
);
|
2023-05-26 19:14:31 +01:00
|
|
|
// clang-format on
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
PipelineBuilder pbld(get_vk_device(GFX10_3));
|
|
|
|
|
pbld.add_vsfs(vs, fs);
|
|
|
|
|
|
|
|
|
|
//>> v1: %x = v_interp_p2_f32 %_, %_:m0, (kill)%_ attr0.x
|
|
|
|
|
//>> v1: %y = v_interp_p2_f32 (kill)%_, (kill)%_:m0, (kill)%_ attr0.y
|
2024-02-19 17:00:19 +00:00
|
|
|
//>> lv2: %wqm = p_start_linear_vgpr %x, %y
|
aco: reorder dpp for ddx/ddy
Having the mov last allows us to fuse it with the use instruction.
Foz-DB Navi31:
Totals from 9400 (11.84% of 79395) affected shaders:
MaxWaves: 273998 -> 274030 (+0.01%)
Instrs: 8303778 -> 8282997 (-0.25%); split: -0.29%, +0.04%
CodeSize: 44428088 -> 44464860 (+0.08%); split: -0.09%, +0.18%
VGPRs: 506616 -> 504492 (-0.42%)
SpillSGPRs: 1389 -> 1393 (+0.29%)
Latency: 76923466 -> 76983332 (+0.08%); split: -0.06%, +0.14%
InvThroughput: 12386888 -> 12391262 (+0.04%); split: -0.04%, +0.07%
VClause: 125136 -> 125059 (-0.06%); split: -0.13%, +0.07%
SClause: 227361 -> 226615 (-0.33%); split: -0.43%, +0.10%
Copies: 440787 -> 440749 (-0.01%); split: -0.17%, +0.16%
PreVGPRs: 339783 -> 333343 (-1.90%); split: -1.92%, +0.02%
VALU: 5088362 -> 5069737 (-0.37%); split: -0.37%, +0.01%
SALU: 606596 -> 606609 (+0.00%); split: -0.01%, +0.01%
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30350>
2024-07-26 20:10:43 +02:00
|
|
|
//>> v1: %x12_m_x0 = v_subrev_f32 (kill)%x, (kill)%x quad_perm:[0,0,0,0] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %x1_m_x0 = v_mov_b32 %x12_m_x0 quad_perm:[1,1,1,1] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %x2_m_x0 = v_mov_b32 (kill)%x12_m_x0 quad_perm:[2,2,2,2] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %y12_m_y0 = v_subrev_f32 (kill)%y, (kill)%y quad_perm:[0,0,0,0] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %y1_m_y0 = v_mov_b32 %y12_m_x0 quad_perm:[1,1,1,1] bound_ctrl:1 fi
|
|
|
|
|
//>> v1: %y2_m_y0 = v_mov_b32 (kill)%y12_m_x0 quad_perm:[2,2,2,2] bound_ctrl:1 fi
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB1
|
2024-01-31 18:44:21 +00:00
|
|
|
//>> v2: %_ = image_get_lod (kill)%_, (kill)%_, v1: undef, (latekill)%wqm 2d
|
2023-04-14 17:50:15 +01:00
|
|
|
//>> BB2
|
|
|
|
|
//>> BB6
|
2024-07-25 17:15:15 +02:00
|
|
|
//>> p_end_linear_vgpr (kill)%wqm
|
2023-04-14 17:50:15 +01:00
|
|
|
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
|
|
|
|
END_TEST
|
|
|
|
|
|
|
|
|
|
BEGIN_TEST(d3d11_derivs.nsa_max)
|
|
|
|
|
for (amd_gfx_level lvl : {GFX10, GFX10_3, GFX11}) {
|
|
|
|
|
if (!setup_cs(NULL, lvl))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
PhysReg reg_v0{256};
|
|
|
|
|
PhysReg reg_v6{256 + 6};
|
|
|
|
|
PhysReg reg_v7{256 + 7};
|
|
|
|
|
PhysReg reg_v8{256 + 8};
|
2024-04-23 15:59:57 +02:00
|
|
|
PhysReg reg_s0{0};
|
|
|
|
|
PhysReg reg_s8{8};
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
//>> p_unit_test 0
|
|
|
|
|
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
|
|
|
|
|
|
|
|
|
|
//~gfx10! v2: %_:v[0-1] = v_lshrrev_b64 0, %_:v[6-7]
|
|
|
|
|
//~gfx10! v1: %_:v[2] = v_mov_b32 %_:v[8]
|
2024-04-23 15:59:57 +02:00
|
|
|
//~gfx10! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[0-5] 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
|
2024-04-23 15:59:57 +02:00
|
|
|
//~gfx10_3! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4], %_:v[5] 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
|
2024-04-23 15:59:57 +02:00
|
|
|
//~gfx11! v4: %_:v[0-3] = image_sample_c_b_o %0:s[0-7], %0:s[8-11], v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4-5] 2darray da
|
2023-04-14 17:50:15 +01:00
|
|
|
|
|
|
|
|
Instruction* instr =
|
2024-04-23 15:59:57 +02:00
|
|
|
bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4), Operand(reg_s0, s8),
|
|
|
|
|
Operand(reg_s8, s4), Operand(v1), Operand(reg_v0, v6.as_linear()),
|
|
|
|
|
Operand(reg_v6, v1), Operand(reg_v7, v1), Operand(reg_v8, v1));
|
2023-04-14 17:50:15 +01:00
|
|
|
instr->mimg().dim = ac_image_2darray;
|
|
|
|
|
instr->mimg().da = true;
|
|
|
|
|
instr->mimg().strict_wqm = true;
|
|
|
|
|
|
|
|
|
|
finish_to_hw_instr_test();
|
|
|
|
|
}
|
|
|
|
|
END_TEST
|